| /* |
| * Copyright (C) 2014 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * Authors: |
| * Jason Ekstrand (jason@jlekstrand.net) |
| */ |
| |
| #include <math.h> |
| #include "util/rounding.h" /* for _mesa_roundeven */ |
| #include "util/half_float.h" |
| #include "util/double.h" |
| #include "util/softfloat.h" |
| #include "util/bigmath.h" |
| #include "nir_constant_expressions.h" |
| |
| #define MAX_UINT_FOR_SIZE(bits) (UINT64_MAX >> (64 - (bits))) |
| |
| /** |
| * rief Checks if the provided value is a denorm and flushes it to zero. |
| */ |
| static void |
| constant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size) |
| { |
| switch(bit_size) { |
| case 64: |
| if (0 == (value->u64 & 0x7ff0000000000000)) |
| value->u64 &= 0x8000000000000000; |
| break; |
| case 32: |
| if (0 == (value->u32 & 0x7f800000)) |
| value->u32 &= 0x80000000; |
| break; |
| case 16: |
| if (0 == (value->u16 & 0x7c00)) |
| value->u16 &= 0x8000; |
| } |
| } |
| |
| /** |
| * Evaluate one component of packSnorm4x8. |
| */ |
| static uint8_t |
| pack_snorm_1x8(float x) |
| { |
| /* From section 8.4 of the GLSL 4.30 spec: |
| * |
| * packSnorm4x8 |
| * ------------ |
| * The conversion for component c of v to fixed point is done as |
| * follows: |
| * |
| * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) |
| * |
| * We must first cast the float to an int, because casting a negative |
| * float to a uint is undefined. |
| */ |
| return (uint8_t) (int) |
| _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); |
| } |
| |
| /** |
| * Evaluate one component of packSnorm2x16. |
| */ |
| static uint16_t |
| pack_snorm_1x16(float x) |
| { |
| /* From section 8.4 of the GLSL ES 3.00 spec: |
| * |
| * packSnorm2x16 |
| * ------------- |
| * The conversion for component c of v to fixed point is done as |
| * follows: |
| * |
| * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) |
| * |
| * We must first cast the float to an int, because casting a negative |
| * float to a uint is undefined. |
| */ |
| return (uint16_t) (int) |
| _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); |
| } |
| |
| /** |
| * Evaluate one component of unpackSnorm4x8. |
| */ |
| static float |
| unpack_snorm_1x8(uint8_t u) |
| { |
| /* From section 8.4 of the GLSL 4.30 spec: |
| * |
| * unpackSnorm4x8 |
| * -------------- |
| * The conversion for unpacked fixed-point value f to floating point is |
| * done as follows: |
| * |
| * unpackSnorm4x8: clamp(f / 127.0, -1, +1) |
| */ |
| return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); |
| } |
| |
| /** |
| * Evaluate one component of unpackSnorm2x16. |
| */ |
| static float |
| unpack_snorm_1x16(uint16_t u) |
| { |
| /* From section 8.4 of the GLSL ES 3.00 spec: |
| * |
| * unpackSnorm2x16 |
| * --------------- |
| * The conversion for unpacked fixed-point value f to floating point is |
| * done as follows: |
| * |
| * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) |
| */ |
| return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); |
| } |
| |
| /** |
| * Evaluate one component packUnorm4x8. |
| */ |
| static uint8_t |
| pack_unorm_1x8(float x) |
| { |
| /* From section 8.4 of the GLSL 4.30 spec: |
| * |
| * packUnorm4x8 |
| * ------------ |
| * The conversion for component c of v to fixed point is done as |
| * follows: |
| * |
| * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) |
| */ |
| return (uint8_t) (int) |
| _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); |
| } |
| |
| /** |
| * Evaluate one component packUnorm2x16. |
| */ |
| static uint16_t |
| pack_unorm_1x16(float x) |
| { |
| /* From section 8.4 of the GLSL ES 3.00 spec: |
| * |
| * packUnorm2x16 |
| * ------------- |
| * The conversion for component c of v to fixed point is done as |
| * follows: |
| * |
| * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) |
| */ |
| return (uint16_t) (int) |
| _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); |
| } |
| |
| /** |
| * Evaluate one component of unpackUnorm4x8. |
| */ |
| static float |
| unpack_unorm_1x8(uint8_t u) |
| { |
| /* From section 8.4 of the GLSL 4.30 spec: |
| * |
| * unpackUnorm4x8 |
| * -------------- |
| * The conversion for unpacked fixed-point value f to floating point is |
| * done as follows: |
| * |
| * unpackUnorm4x8: f / 255.0 |
| */ |
| return (float) u / 255.0f; |
| } |
| |
| /** |
| * Evaluate one component of unpackUnorm2x16. |
| */ |
| static float |
| unpack_unorm_1x16(uint16_t u) |
| { |
| /* From section 8.4 of the GLSL ES 3.00 spec: |
| * |
| * unpackUnorm2x16 |
| * --------------- |
| * The conversion for unpacked fixed-point value f to floating point is |
| * done as follows: |
| * |
| * unpackUnorm2x16: f / 65535.0 |
| */ |
| return (float) u / 65535.0f; |
| } |
| |
| /** |
| * Evaluate one component of packHalf2x16. |
| */ |
| static uint16_t |
| pack_half_1x16(float x) |
| { |
| return _mesa_float_to_half(x); |
| } |
| |
| /** |
| * Evaluate one component of unpackHalf2x16. |
| */ |
| static float |
| unpack_half_1x16_flush_to_zero(uint16_t u) |
| { |
| if (0 == (u & 0x7c00)) |
| u &= 0x8000; |
| return _mesa_half_to_float(u); |
| } |
| |
| /** |
| * Evaluate one component of unpackHalf2x16. |
| */ |
| static float |
| unpack_half_1x16(uint16_t u) |
| { |
| return _mesa_half_to_float(u); |
| } |
| |
| /* Some typed vector structures to make things like src0.y work */ |
| typedef int8_t int1_t; |
| typedef uint8_t uint1_t; |
| typedef float float16_t; |
| typedef float float32_t; |
| typedef double float64_t; |
| typedef bool bool1_t; |
| typedef bool bool8_t; |
| typedef bool bool16_t; |
| typedef bool bool32_t; |
| typedef bool bool64_t; |
| struct float16_vec { |
| float16_t x; |
| float16_t y; |
| float16_t z; |
| float16_t w; |
| float16_t e; |
| float16_t f; |
| float16_t g; |
| float16_t h; |
| float16_t i; |
| float16_t j; |
| float16_t k; |
| float16_t l; |
| float16_t m; |
| float16_t n; |
| float16_t o; |
| float16_t p; |
| }; |
| struct float32_vec { |
| float32_t x; |
| float32_t y; |
| float32_t z; |
| float32_t w; |
| float32_t e; |
| float32_t f; |
| float32_t g; |
| float32_t h; |
| float32_t i; |
| float32_t j; |
| float32_t k; |
| float32_t l; |
| float32_t m; |
| float32_t n; |
| float32_t o; |
| float32_t p; |
| }; |
| struct float64_vec { |
| float64_t x; |
| float64_t y; |
| float64_t z; |
| float64_t w; |
| float64_t e; |
| float64_t f; |
| float64_t g; |
| float64_t h; |
| float64_t i; |
| float64_t j; |
| float64_t k; |
| float64_t l; |
| float64_t m; |
| float64_t n; |
| float64_t o; |
| float64_t p; |
| }; |
| struct int1_vec { |
| int1_t x; |
| int1_t y; |
| int1_t z; |
| int1_t w; |
| int1_t e; |
| int1_t f; |
| int1_t g; |
| int1_t h; |
| int1_t i; |
| int1_t j; |
| int1_t k; |
| int1_t l; |
| int1_t m; |
| int1_t n; |
| int1_t o; |
| int1_t p; |
| }; |
| struct int8_vec { |
| int8_t x; |
| int8_t y; |
| int8_t z; |
| int8_t w; |
| int8_t e; |
| int8_t f; |
| int8_t g; |
| int8_t h; |
| int8_t i; |
| int8_t j; |
| int8_t k; |
| int8_t l; |
| int8_t m; |
| int8_t n; |
| int8_t o; |
| int8_t p; |
| }; |
| struct int16_vec { |
| int16_t x; |
| int16_t y; |
| int16_t z; |
| int16_t w; |
| int16_t e; |
| int16_t f; |
| int16_t g; |
| int16_t h; |
| int16_t i; |
| int16_t j; |
| int16_t k; |
| int16_t l; |
| int16_t m; |
| int16_t n; |
| int16_t o; |
| int16_t p; |
| }; |
| struct int32_vec { |
| int32_t x; |
| int32_t y; |
| int32_t z; |
| int32_t w; |
| int32_t e; |
| int32_t f; |
| int32_t g; |
| int32_t h; |
| int32_t i; |
| int32_t j; |
| int32_t k; |
| int32_t l; |
| int32_t m; |
| int32_t n; |
| int32_t o; |
| int32_t p; |
| }; |
| struct int64_vec { |
| int64_t x; |
| int64_t y; |
| int64_t z; |
| int64_t w; |
| int64_t e; |
| int64_t f; |
| int64_t g; |
| int64_t h; |
| int64_t i; |
| int64_t j; |
| int64_t k; |
| int64_t l; |
| int64_t m; |
| int64_t n; |
| int64_t o; |
| int64_t p; |
| }; |
| struct uint1_vec { |
| uint1_t x; |
| uint1_t y; |
| uint1_t z; |
| uint1_t w; |
| uint1_t e; |
| uint1_t f; |
| uint1_t g; |
| uint1_t h; |
| uint1_t i; |
| uint1_t j; |
| uint1_t k; |
| uint1_t l; |
| uint1_t m; |
| uint1_t n; |
| uint1_t o; |
| uint1_t p; |
| }; |
| struct uint8_vec { |
| uint8_t x; |
| uint8_t y; |
| uint8_t z; |
| uint8_t w; |
| uint8_t e; |
| uint8_t f; |
| uint8_t g; |
| uint8_t h; |
| uint8_t i; |
| uint8_t j; |
| uint8_t k; |
| uint8_t l; |
| uint8_t m; |
| uint8_t n; |
| uint8_t o; |
| uint8_t p; |
| }; |
| struct uint16_vec { |
| uint16_t x; |
| uint16_t y; |
| uint16_t z; |
| uint16_t w; |
| uint16_t e; |
| uint16_t f; |
| uint16_t g; |
| uint16_t h; |
| uint16_t i; |
| uint16_t j; |
| uint16_t k; |
| uint16_t l; |
| uint16_t m; |
| uint16_t n; |
| uint16_t o; |
| uint16_t p; |
| }; |
| struct uint32_vec { |
| uint32_t x; |
| uint32_t y; |
| uint32_t z; |
| uint32_t w; |
| uint32_t e; |
| uint32_t f; |
| uint32_t g; |
| uint32_t h; |
| uint32_t i; |
| uint32_t j; |
| uint32_t k; |
| uint32_t l; |
| uint32_t m; |
| uint32_t n; |
| uint32_t o; |
| uint32_t p; |
| }; |
| struct uint64_vec { |
| uint64_t x; |
| uint64_t y; |
| uint64_t z; |
| uint64_t w; |
| uint64_t e; |
| uint64_t f; |
| uint64_t g; |
| uint64_t h; |
| uint64_t i; |
| uint64_t j; |
| uint64_t k; |
| uint64_t l; |
| uint64_t m; |
| uint64_t n; |
| uint64_t o; |
| uint64_t p; |
| }; |
| struct bool1_vec { |
| bool1_t x; |
| bool1_t y; |
| bool1_t z; |
| bool1_t w; |
| bool1_t e; |
| bool1_t f; |
| bool1_t g; |
| bool1_t h; |
| bool1_t i; |
| bool1_t j; |
| bool1_t k; |
| bool1_t l; |
| bool1_t m; |
| bool1_t n; |
| bool1_t o; |
| bool1_t p; |
| }; |
| struct bool8_vec { |
| bool8_t x; |
| bool8_t y; |
| bool8_t z; |
| bool8_t w; |
| bool8_t e; |
| bool8_t f; |
| bool8_t g; |
| bool8_t h; |
| bool8_t i; |
| bool8_t j; |
| bool8_t k; |
| bool8_t l; |
| bool8_t m; |
| bool8_t n; |
| bool8_t o; |
| bool8_t p; |
| }; |
| struct bool16_vec { |
| bool16_t x; |
| bool16_t y; |
| bool16_t z; |
| bool16_t w; |
| bool16_t e; |
| bool16_t f; |
| bool16_t g; |
| bool16_t h; |
| bool16_t i; |
| bool16_t j; |
| bool16_t k; |
| bool16_t l; |
| bool16_t m; |
| bool16_t n; |
| bool16_t o; |
| bool16_t p; |
| }; |
| struct bool32_vec { |
| bool32_t x; |
| bool32_t y; |
| bool32_t z; |
| bool32_t w; |
| bool32_t e; |
| bool32_t f; |
| bool32_t g; |
| bool32_t h; |
| bool32_t i; |
| bool32_t j; |
| bool32_t k; |
| bool32_t l; |
| bool32_t m; |
| bool32_t n; |
| bool32_t o; |
| bool32_t p; |
| }; |
| |
| |
| |
| static void |
| evaluate_amul(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = src0 * src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = src0 * src1; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = src0 * src1; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src0 * src1; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = src0 * src1; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_fequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_fequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_fequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_fequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_fequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_iequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][15].b, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][15].b, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| _src[0][8].i8, |
| _src[0][9].i8, |
| _src[0][10].i8, |
| _src[0][11].i8, |
| _src[0][12].i8, |
| _src[0][13].i8, |
| _src[0][14].i8, |
| _src[0][15].i8, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| _src[1][8].i8, |
| _src[1][9].i8, |
| _src[1][10].i8, |
| _src[1][11].i8, |
| _src[1][12].i8, |
| _src[1][13].i8, |
| _src[1][14].i8, |
| _src[1][15].i8, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| _src[0][8].i16, |
| _src[0][9].i16, |
| _src[0][10].i16, |
| _src[0][11].i16, |
| _src[0][12].i16, |
| _src[0][13].i16, |
| _src[0][14].i16, |
| _src[0][15].i16, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| _src[1][8].i16, |
| _src[1][9].i16, |
| _src[1][10].i16, |
| _src[1][11].i16, |
| _src[1][12].i16, |
| _src[1][13].i16, |
| _src[1][14].i16, |
| _src[1][15].i16, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| _src[0][8].i32, |
| _src[0][9].i32, |
| _src[0][10].i32, |
| _src[0][11].i32, |
| _src[0][12].i32, |
| _src[0][13].i32, |
| _src[0][14].i32, |
| _src[0][15].i32, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| _src[1][8].i32, |
| _src[1][9].i32, |
| _src[1][10].i32, |
| _src[1][11].i32, |
| _src[1][12].i32, |
| _src[1][13].i32, |
| _src[1][14].i32, |
| _src[1][15].i32, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| _src[0][8].i64, |
| _src[0][9].i64, |
| _src[0][10].i64, |
| _src[0][11].i64, |
| _src[0][12].i64, |
| _src[0][13].i64, |
| _src[0][14].i64, |
| _src[0][15].i64, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| _src[1][8].i64, |
| _src[1][9].i64, |
| _src[1][10].i64, |
| _src[1][11].i64, |
| _src[1][12].i64, |
| _src[1][13].i64, |
| _src[1][14].i64, |
| _src[1][15].i64, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_iequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_iequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_iequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16all_iequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_fnequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_fnequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_fnequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_fnequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_fnequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_inequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][15].b, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][15].b, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| _src[0][8].i8, |
| _src[0][9].i8, |
| _src[0][10].i8, |
| _src[0][11].i8, |
| _src[0][12].i8, |
| _src[0][13].i8, |
| _src[0][14].i8, |
| _src[0][15].i8, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| _src[1][8].i8, |
| _src[1][9].i8, |
| _src[1][10].i8, |
| _src[1][11].i8, |
| _src[1][12].i8, |
| _src[1][13].i8, |
| _src[1][14].i8, |
| _src[1][15].i8, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| _src[0][8].i16, |
| _src[0][9].i16, |
| _src[0][10].i16, |
| _src[0][11].i16, |
| _src[0][12].i16, |
| _src[0][13].i16, |
| _src[0][14].i16, |
| _src[0][15].i16, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| _src[1][8].i16, |
| _src[1][9].i16, |
| _src[1][10].i16, |
| _src[1][11].i16, |
| _src[1][12].i16, |
| _src[1][13].i16, |
| _src[1][14].i16, |
| _src[1][15].i16, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| _src[0][8].i32, |
| _src[0][9].i32, |
| _src[0][10].i32, |
| _src[0][11].i32, |
| _src[0][12].i32, |
| _src[0][13].i32, |
| _src[0][14].i32, |
| _src[0][15].i32, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| _src[1][8].i32, |
| _src[1][9].i32, |
| _src[1][10].i32, |
| _src[1][11].i32, |
| _src[1][12].i32, |
| _src[1][13].i32, |
| _src[1][14].i32, |
| _src[1][15].i32, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| _src[0][8].i64, |
| _src[0][9].i64, |
| _src[0][10].i64, |
| _src[0][11].i64, |
| _src[0][12].i64, |
| _src[0][13].i64, |
| _src[0][14].i64, |
| _src[0][15].i64, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| _src[1][8].i64, |
| _src[1][9].i64, |
| _src[1][10].i64, |
| _src[1][11].i64, |
| _src[1][12].i64, |
| _src[1][13].i64, |
| _src[1][14].i64, |
| _src[1][15].i64, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_inequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_inequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_inequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16any_inequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i16 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b16csel(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| const uint1_t src2 = |
| _src[2][_i].b; |
| |
| uint1_t dst = src0 ? src1 : src2; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| const uint8_t src2 = |
| _src[2][_i].u8; |
| |
| uint8_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| const uint16_t src2 = |
| _src[2][_i].u16; |
| |
| uint16_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| uint32_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| const uint64_t src2 = |
| _src[2][_i].u64; |
| |
| uint64_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2b1(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2b16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2b32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2b8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2f16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2f32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2f64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2i1(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2i16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2i32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2i64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b2i8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool16_t src0 = |
| _src[0][_i].i16; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_fequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_fequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_fequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_fequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_fequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_iequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][15].b, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][15].b, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| _src[0][8].i8, |
| _src[0][9].i8, |
| _src[0][10].i8, |
| _src[0][11].i8, |
| _src[0][12].i8, |
| _src[0][13].i8, |
| _src[0][14].i8, |
| _src[0][15].i8, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| _src[1][8].i8, |
| _src[1][9].i8, |
| _src[1][10].i8, |
| _src[1][11].i8, |
| _src[1][12].i8, |
| _src[1][13].i8, |
| _src[1][14].i8, |
| _src[1][15].i8, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| _src[0][8].i16, |
| _src[0][9].i16, |
| _src[0][10].i16, |
| _src[0][11].i16, |
| _src[0][12].i16, |
| _src[0][13].i16, |
| _src[0][14].i16, |
| _src[0][15].i16, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| _src[1][8].i16, |
| _src[1][9].i16, |
| _src[1][10].i16, |
| _src[1][11].i16, |
| _src[1][12].i16, |
| _src[1][13].i16, |
| _src[1][14].i16, |
| _src[1][15].i16, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| _src[0][8].i32, |
| _src[0][9].i32, |
| _src[0][10].i32, |
| _src[0][11].i32, |
| _src[0][12].i32, |
| _src[0][13].i32, |
| _src[0][14].i32, |
| _src[0][15].i32, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| _src[1][8].i32, |
| _src[1][9].i32, |
| _src[1][10].i32, |
| _src[1][11].i32, |
| _src[1][12].i32, |
| _src[1][13].i32, |
| _src[1][14].i32, |
| _src[1][15].i32, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| _src[0][8].i64, |
| _src[0][9].i64, |
| _src[0][10].i64, |
| _src[0][11].i64, |
| _src[0][12].i64, |
| _src[0][13].i64, |
| _src[0][14].i64, |
| _src[0][15].i64, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| _src[1][8].i64, |
| _src[1][9].i64, |
| _src[1][10].i64, |
| _src[1][11].i64, |
| _src[1][12].i64, |
| _src[1][13].i64, |
| _src[1][14].i64, |
| _src[1][15].i64, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_iequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_iequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_iequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32all_iequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_fnequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_fnequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_fnequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_fnequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_fnequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_inequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][15].b, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][15].b, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| _src[0][8].i8, |
| _src[0][9].i8, |
| _src[0][10].i8, |
| _src[0][11].i8, |
| _src[0][12].i8, |
| _src[0][13].i8, |
| _src[0][14].i8, |
| _src[0][15].i8, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| _src[1][8].i8, |
| _src[1][9].i8, |
| _src[1][10].i8, |
| _src[1][11].i8, |
| _src[1][12].i8, |
| _src[1][13].i8, |
| _src[1][14].i8, |
| _src[1][15].i8, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| _src[0][8].i16, |
| _src[0][9].i16, |
| _src[0][10].i16, |
| _src[0][11].i16, |
| _src[0][12].i16, |
| _src[0][13].i16, |
| _src[0][14].i16, |
| _src[0][15].i16, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| _src[1][8].i16, |
| _src[1][9].i16, |
| _src[1][10].i16, |
| _src[1][11].i16, |
| _src[1][12].i16, |
| _src[1][13].i16, |
| _src[1][14].i16, |
| _src[1][15].i16, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| _src[0][8].i32, |
| _src[0][9].i32, |
| _src[0][10].i32, |
| _src[0][11].i32, |
| _src[0][12].i32, |
| _src[0][13].i32, |
| _src[0][14].i32, |
| _src[0][15].i32, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| _src[1][8].i32, |
| _src[1][9].i32, |
| _src[1][10].i32, |
| _src[1][11].i32, |
| _src[1][12].i32, |
| _src[1][13].i32, |
| _src[1][14].i32, |
| _src[1][15].i32, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| _src[0][8].i64, |
| _src[0][9].i64, |
| _src[0][10].i64, |
| _src[0][11].i64, |
| _src[0][12].i64, |
| _src[0][13].i64, |
| _src[0][14].i64, |
| _src[0][15].i64, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| _src[1][8].i64, |
| _src[1][9].i64, |
| _src[1][10].i64, |
| _src[1][11].i64, |
| _src[1][12].i64, |
| _src[1][13].i64, |
| _src[1][14].i64, |
| _src[1][15].i64, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_inequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_inequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_inequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32any_inequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i32 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b32csel(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| const uint1_t src2 = |
| _src[2][_i].b; |
| |
| uint1_t dst = src0 ? src1 : src2; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| const uint8_t src2 = |
| _src[2][_i].u8; |
| |
| uint8_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| const uint16_t src2 = |
| _src[2][_i].u16; |
| |
| uint16_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| uint32_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool32_t src0 = |
| _src[0][_i].i32; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| const uint64_t src2 = |
| _src[2][_i].u64; |
| |
| uint64_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_fequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_fequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_fequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_fequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_fequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_iequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][15].b, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][15].b, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| _src[0][8].i8, |
| _src[0][9].i8, |
| _src[0][10].i8, |
| _src[0][11].i8, |
| _src[0][12].i8, |
| _src[0][13].i8, |
| _src[0][14].i8, |
| _src[0][15].i8, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| _src[1][8].i8, |
| _src[1][9].i8, |
| _src[1][10].i8, |
| _src[1][11].i8, |
| _src[1][12].i8, |
| _src[1][13].i8, |
| _src[1][14].i8, |
| _src[1][15].i8, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| _src[0][8].i16, |
| _src[0][9].i16, |
| _src[0][10].i16, |
| _src[0][11].i16, |
| _src[0][12].i16, |
| _src[0][13].i16, |
| _src[0][14].i16, |
| _src[0][15].i16, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| _src[1][8].i16, |
| _src[1][9].i16, |
| _src[1][10].i16, |
| _src[1][11].i16, |
| _src[1][12].i16, |
| _src[1][13].i16, |
| _src[1][14].i16, |
| _src[1][15].i16, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| _src[0][8].i32, |
| _src[0][9].i32, |
| _src[0][10].i32, |
| _src[0][11].i32, |
| _src[0][12].i32, |
| _src[0][13].i32, |
| _src[0][14].i32, |
| _src[0][15].i32, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| _src[1][8].i32, |
| _src[1][9].i32, |
| _src[1][10].i32, |
| _src[1][11].i32, |
| _src[1][12].i32, |
| _src[1][13].i32, |
| _src[1][14].i32, |
| _src[1][15].i32, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| _src[0][8].i64, |
| _src[0][9].i64, |
| _src[0][10].i64, |
| _src[0][11].i64, |
| _src[0][12].i64, |
| _src[0][13].i64, |
| _src[0][14].i64, |
| _src[0][15].i64, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| _src[1][8].i64, |
| _src[1][9].i64, |
| _src[1][10].i64, |
| _src[1][11].i64, |
| _src[1][12].i64, |
| _src[1][13].i64, |
| _src[1][14].i64, |
| _src[1][15].i64, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_iequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_iequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_iequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8all_iequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_fnequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_fnequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_fnequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_fnequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_fnequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_inequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][15].b, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][15].b, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| _src[0][8].i8, |
| _src[0][9].i8, |
| _src[0][10].i8, |
| _src[0][11].i8, |
| _src[0][12].i8, |
| _src[0][13].i8, |
| _src[0][14].i8, |
| _src[0][15].i8, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| _src[1][8].i8, |
| _src[1][9].i8, |
| _src[1][10].i8, |
| _src[1][11].i8, |
| _src[1][12].i8, |
| _src[1][13].i8, |
| _src[1][14].i8, |
| _src[1][15].i8, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| _src[0][8].i16, |
| _src[0][9].i16, |
| _src[0][10].i16, |
| _src[0][11].i16, |
| _src[0][12].i16, |
| _src[0][13].i16, |
| _src[0][14].i16, |
| _src[0][15].i16, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| _src[1][8].i16, |
| _src[1][9].i16, |
| _src[1][10].i16, |
| _src[1][11].i16, |
| _src[1][12].i16, |
| _src[1][13].i16, |
| _src[1][14].i16, |
| _src[1][15].i16, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| _src[0][8].i32, |
| _src[0][9].i32, |
| _src[0][10].i32, |
| _src[0][11].i32, |
| _src[0][12].i32, |
| _src[0][13].i32, |
| _src[0][14].i32, |
| _src[0][15].i32, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| _src[1][8].i32, |
| _src[1][9].i32, |
| _src[1][10].i32, |
| _src[1][11].i32, |
| _src[1][12].i32, |
| _src[1][13].i32, |
| _src[1][14].i32, |
| _src[1][15].i32, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| _src[0][8].i64, |
| _src[0][9].i64, |
| _src[0][10].i64, |
| _src[0][11].i64, |
| _src[0][12].i64, |
| _src[0][13].i64, |
| _src[0][14].i64, |
| _src[0][15].i64, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| _src[1][8].i64, |
| _src[1][9].i64, |
| _src[1][10].i64, |
| _src[1][11].i64, |
| _src[1][12].i64, |
| _src[1][13].i64, |
| _src[1][14].i64, |
| _src[1][15].i64, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_inequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_inequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_inequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8any_inequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool8_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].i8 = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_b8csel(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| const uint1_t src2 = |
| _src[2][_i].b; |
| |
| uint1_t dst = src0 ? src1 : src2; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| const uint8_t src2 = |
| _src[2][_i].u8; |
| |
| uint8_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| const uint16_t src2 = |
| _src[2][_i].u16; |
| |
| uint16_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| uint32_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool8_t src0 = |
| _src[0][_i].i8; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| const uint64_t src2 = |
| _src[2][_i].u64; |
| |
| uint64_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_fequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_fequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_fequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_fequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_fequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_iequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][15].b, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][15].b, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| _src[0][8].i8, |
| _src[0][9].i8, |
| _src[0][10].i8, |
| _src[0][11].i8, |
| _src[0][12].i8, |
| _src[0][13].i8, |
| _src[0][14].i8, |
| _src[0][15].i8, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| _src[1][8].i8, |
| _src[1][9].i8, |
| _src[1][10].i8, |
| _src[1][11].i8, |
| _src[1][12].i8, |
| _src[1][13].i8, |
| _src[1][14].i8, |
| _src[1][15].i8, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| _src[0][8].i16, |
| _src[0][9].i16, |
| _src[0][10].i16, |
| _src[0][11].i16, |
| _src[0][12].i16, |
| _src[0][13].i16, |
| _src[0][14].i16, |
| _src[0][15].i16, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| _src[1][8].i16, |
| _src[1][9].i16, |
| _src[1][10].i16, |
| _src[1][11].i16, |
| _src[1][12].i16, |
| _src[1][13].i16, |
| _src[1][14].i16, |
| _src[1][15].i16, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| _src[0][8].i32, |
| _src[0][9].i32, |
| _src[0][10].i32, |
| _src[0][11].i32, |
| _src[0][12].i32, |
| _src[0][13].i32, |
| _src[0][14].i32, |
| _src[0][15].i32, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| _src[1][8].i32, |
| _src[1][9].i32, |
| _src[1][10].i32, |
| _src[1][11].i32, |
| _src[1][12].i32, |
| _src[1][13].i32, |
| _src[1][14].i32, |
| _src[1][15].i32, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| _src[0][8].i64, |
| _src[0][9].i64, |
| _src[0][10].i64, |
| _src[0][11].i64, |
| _src[0][12].i64, |
| _src[0][13].i64, |
| _src[0][14].i64, |
| _src[0][15].i64, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| _src[1][8].i64, |
| _src[1][9].i64, |
| _src[1][10].i64, |
| _src[1][11].i64, |
| _src[1][12].i64, |
| _src[1][13].i64, |
| _src[1][14].i64, |
| _src[1][15].i64, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_iequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_iequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_iequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ball_iequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_fnequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_fnequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_fnequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_fnequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_fnequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_inequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][15].b, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][8].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][9].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][10].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][11].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][12].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][13].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][14].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][15].b, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| _src[0][8].i8, |
| _src[0][9].i8, |
| _src[0][10].i8, |
| _src[0][11].i8, |
| _src[0][12].i8, |
| _src[0][13].i8, |
| _src[0][14].i8, |
| _src[0][15].i8, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| _src[1][8].i8, |
| _src[1][9].i8, |
| _src[1][10].i8, |
| _src[1][11].i8, |
| _src[1][12].i8, |
| _src[1][13].i8, |
| _src[1][14].i8, |
| _src[1][15].i8, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| _src[0][8].i16, |
| _src[0][9].i16, |
| _src[0][10].i16, |
| _src[0][11].i16, |
| _src[0][12].i16, |
| _src[0][13].i16, |
| _src[0][14].i16, |
| _src[0][15].i16, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| _src[1][8].i16, |
| _src[1][9].i16, |
| _src[1][10].i16, |
| _src[1][11].i16, |
| _src[1][12].i16, |
| _src[1][13].i16, |
| _src[1][14].i16, |
| _src[1][15].i16, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| _src[0][8].i32, |
| _src[0][9].i32, |
| _src[0][10].i32, |
| _src[0][11].i32, |
| _src[0][12].i32, |
| _src[0][13].i32, |
| _src[0][14].i32, |
| _src[0][15].i32, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| _src[1][8].i32, |
| _src[1][9].i32, |
| _src[1][10].i32, |
| _src[1][11].i32, |
| _src[1][12].i32, |
| _src[1][13].i32, |
| _src[1][14].i32, |
| _src[1][15].i32, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| _src[0][8].i64, |
| _src[0][9].i64, |
| _src[0][10].i64, |
| _src[0][11].i64, |
| _src[0][12].i64, |
| _src[0][13].i64, |
| _src[0][14].i64, |
| _src[0][15].i64, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| _src[1][8].i64, |
| _src[1][9].i64, |
| _src[1][10].i64, |
| _src[1][11].i64, |
| _src[1][12].i64, |
| _src[1][13].i64, |
| _src[1][14].i64, |
| _src[1][15].i64, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_inequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_inequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_inequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bany_inequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct int1_vec src0 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[0][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int1_vec src1 = { |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][0].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][1].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][2].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][3].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][4].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][5].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][6].b, |
| /* 1-bit integers use a 0/-1 convention */ |
| -(int1_t)_src[1][7].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct int8_vec src0 = { |
| _src[0][0].i8, |
| _src[0][1].i8, |
| _src[0][2].i8, |
| _src[0][3].i8, |
| _src[0][4].i8, |
| _src[0][5].i8, |
| _src[0][6].i8, |
| _src[0][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int8_vec src1 = { |
| _src[1][0].i8, |
| _src[1][1].i8, |
| _src[1][2].i8, |
| _src[1][3].i8, |
| _src[1][4].i8, |
| _src[1][5].i8, |
| _src[1][6].i8, |
| _src[1][7].i8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct int16_vec src0 = { |
| _src[0][0].i16, |
| _src[0][1].i16, |
| _src[0][2].i16, |
| _src[0][3].i16, |
| _src[0][4].i16, |
| _src[0][5].i16, |
| _src[0][6].i16, |
| _src[0][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int16_vec src1 = { |
| _src[1][0].i16, |
| _src[1][1].i16, |
| _src[1][2].i16, |
| _src[1][3].i16, |
| _src[1][4].i16, |
| _src[1][5].i16, |
| _src[1][6].i16, |
| _src[1][7].i16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct int32_vec src0 = { |
| _src[0][0].i32, |
| _src[0][1].i32, |
| _src[0][2].i32, |
| _src[0][3].i32, |
| _src[0][4].i32, |
| _src[0][5].i32, |
| _src[0][6].i32, |
| _src[0][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int32_vec src1 = { |
| _src[1][0].i32, |
| _src[1][1].i32, |
| _src[1][2].i32, |
| _src[1][3].i32, |
| _src[1][4].i32, |
| _src[1][5].i32, |
| _src[1][6].i32, |
| _src[1][7].i32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct int64_vec src0 = { |
| _src[0][0].i64, |
| _src[0][1].i64, |
| _src[0][2].i64, |
| _src[0][3].i64, |
| _src[0][4].i64, |
| _src[0][5].i64, |
| _src[0][6].i64, |
| _src[0][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct int64_vec src1 = { |
| _src[1][0].i64, |
| _src[1][1].i64, |
| _src[1][2].i64, |
| _src[1][3].i64, |
| _src[1][4].i64, |
| _src[1][5].i64, |
| _src[1][6].i64, |
| _src[1][7].i64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct bool1_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); |
| |
| _dst_val[0].b = -(int)dst.x; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bcsel(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| const uint1_t src2 = |
| _src[2][_i].b; |
| |
| uint1_t dst = src0 ? src1 : src2; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| const uint8_t src2 = |
| _src[2][_i].u8; |
| |
| uint8_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| const uint16_t src2 = |
| _src[2][_i].u16; |
| |
| uint16_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| uint32_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const bool1_t src0 = |
| _src[0][_i].b; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| const uint64_t src2 = |
| _src[2][_i].u64; |
| |
| uint64_t dst = src0 ? src1 : src2; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bfi(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| unsigned mask = src0, insert = src1, base = src2; |
| if (mask == 0) { |
| dst = base; |
| } else { |
| unsigned tmp = mask; |
| while (!(tmp & 1)) { |
| tmp >>= 1; |
| insert <<= 1; |
| } |
| dst = (base & ~mask) | (insert & mask); |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_bfm(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| uint32_t dst; |
| |
| |
| int bits = src0 & 0x1F; |
| int offset = src1 & 0x1F; |
| dst = ((1u << bits) - 1) << offset; |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_bit_count(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| uint32_t dst; |
| |
| |
| dst = 0; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) |
| dst++; |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| uint32_t dst; |
| |
| |
| dst = 0; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) |
| dst++; |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| uint32_t dst; |
| |
| |
| dst = 0; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) |
| dst++; |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| dst = 0; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) |
| dst++; |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint32_t dst; |
| |
| |
| dst = 0; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) |
| dst++; |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_bitfield_insert(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const int32_t src2 = |
| _src[2][_i].i32; |
| const int32_t src3 = |
| _src[3][_i].i32; |
| |
| uint32_t dst; |
| |
| |
| unsigned base = src0, insert = src1; |
| int offset = src2, bits = src3; |
| if (bits == 0) { |
| dst = base; |
| } else if (offset < 0 || bits < 0 || bits + offset > 32) { |
| dst = 0; |
| } else { |
| unsigned mask = ((1ull << bits) - 1) << offset; |
| dst = (base & ~mask) | ((insert << offset) & mask); |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_bitfield_reverse(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| /* we're not winning any awards for speed here, but that's ok */ |
| dst = 0; |
| for (unsigned bit = 0; bit < 32; bit++) |
| dst |= ((src0 >> bit) & 1) << (31 - bit); |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_bitfield_select(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| const uint1_t src2 = |
| _src[2][_i].b; |
| |
| uint1_t dst = (src0 & src1) | (~src0 & src2); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| const uint8_t src2 = |
| _src[2][_i].u8; |
| |
| uint8_t dst = (src0 & src1) | (~src0 & src2); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| const uint16_t src2 = |
| _src[2][_i].u16; |
| |
| uint16_t dst = (src0 & src1) | (~src0 & src2); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| uint32_t dst = (src0 & src1) | (~src0 & src2); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| const uint64_t src2 = |
| _src[2][_i].u64; |
| |
| uint64_t dst = (src0 & src1) | (~src0 & src2); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_cube_face_coord(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| |
| dst.x = dst.y = 0.0; |
| float absX = fabsf(src0.x); |
| float absY = fabsf(src0.y); |
| float absZ = fabsf(src0.z); |
| |
| float ma = 0.0; |
| if (absX >= absY && absX >= absZ) { ma = 2 * src0.x; } |
| if (absY >= absX && absY >= absZ) { ma = 2 * src0.y; } |
| if (absZ >= absX && absZ >= absY) { ma = 2 * src0.z; } |
| |
| if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; } |
| if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; } |
| if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0.z; } |
| if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; } |
| if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; } |
| if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; } |
| |
| dst.x = dst.x * (1.0f / ma) + 0.5f; |
| dst.y = dst.y * (1.0f / ma) + 0.5f; |
| |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| |
| } |
| static void |
| evaluate_cube_face_index(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| |
| float absX = fabsf(src0.x); |
| float absY = fabsf(src0.y); |
| float absZ = fabsf(src0.z); |
| if (src0.x >= 0 && absX >= absY && absX >= absZ) dst.x = 0; |
| if (src0.x < 0 && absX >= absY && absX >= absZ) dst.x = 1; |
| if (src0.y >= 0 && absY >= absX && absY >= absZ) dst.x = 2; |
| if (src0.y < 0 && absY >= absX && absY >= absZ) dst.x = 3; |
| if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4; |
| if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5; |
| |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_extract_i16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = (int16_t)(src0 >> (src1 * 16)); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = (int16_t)(src0 >> (src1 * 16)); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = (int16_t)(src0 >> (src1 * 16)); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = (int16_t)(src0 >> (src1 * 16)); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = (int16_t)(src0 >> (src1 * 16)); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_extract_i8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = (int8_t)(src0 >> (src1 * 8)); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = (int8_t)(src0 >> (src1 * 8)); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = (int8_t)(src0 >> (src1 * 8)); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = (int8_t)(src0 >> (src1 * 8)); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = (int8_t)(src0 >> (src1 * 8)); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_extract_u16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = (uint16_t)(src0 >> (src1 * 16)); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = (uint16_t)(src0 >> (src1 * 16)); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = (uint16_t)(src0 >> (src1 * 16)); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = (uint16_t)(src0 >> (src1 * 16)); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = (uint16_t)(src0 >> (src1 * 16)); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_extract_u8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = (uint8_t)(src0 >> (src1 * 8)); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = (uint8_t)(src0 >> (src1 * 8)); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = (uint8_t)(src0 >> (src1 * 8)); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = (uint8_t)(src0 >> (src1 * 8)); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = (uint8_t)(src0 >> (src1 * 8)); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2b1(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2b16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2b32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2b8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2f16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2f16_rtne(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst; |
| |
| |
| if (bit_size > 16) { |
| dst = _mesa_half_to_float(_mesa_float_to_float16_rtne(src0)); |
| } else { |
| dst = src0; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float16_t dst; |
| |
| |
| if (bit_size > 16) { |
| dst = _mesa_half_to_float(_mesa_float_to_float16_rtne(src0)); |
| } else { |
| dst = src0; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float16_t dst; |
| |
| |
| if (bit_size > 16) { |
| dst = _mesa_half_to_float(_mesa_float_to_float16_rtne(src0)); |
| } else { |
| dst = src0; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2f16_rtz(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst; |
| |
| |
| if (bit_size > 16) { |
| dst = _mesa_half_to_float(_mesa_float_to_float16_rtz(src0)); |
| } else { |
| dst = src0; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float16_t dst; |
| |
| |
| if (bit_size > 16) { |
| dst = _mesa_half_to_float(_mesa_float_to_float16_rtz(src0)); |
| } else { |
| dst = src0; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float16_t dst; |
| |
| |
| if (bit_size > 16) { |
| dst = _mesa_half_to_float(_mesa_float_to_float16_rtz(src0)); |
| } else { |
| dst = src0; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2f32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float32_t dst; |
| |
| |
| if (bit_size > 32 && nir_is_rounding_mode_rtz(execution_mode, 32)) { |
| dst = _mesa_double_to_float_rtz(src0); |
| } else { |
| dst = src0; |
| } |
| |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst; |
| |
| |
| if (bit_size > 32 && nir_is_rounding_mode_rtz(execution_mode, 32)) { |
| dst = _mesa_double_to_float_rtz(src0); |
| } else { |
| dst = src0; |
| } |
| |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float32_t dst; |
| |
| |
| if (bit_size > 32 && nir_is_rounding_mode_rtz(execution_mode, 32)) { |
| dst = _mesa_double_to_float_rtz(src0); |
| } else { |
| dst = src0; |
| } |
| |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2f64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2fmp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| } |
| static void |
| evaluate_f2i1(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2i16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2i32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2i64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2i8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2imp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_f2u1(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2u16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2u32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2u64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2u8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_f2ump(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_fabs(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = fabs(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = fabs(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = fabs(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fadd(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_add_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 + (double)src1); |
| } else { |
| dst = src0 + src1; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_add_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 + (double)src1); |
| } else { |
| dst = src0 + src1; |
| } |
| |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_add_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 + (double)src1); |
| } else { |
| dst = src0 + src1; |
| } |
| |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fall_equal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fall_equal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fall_equal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fall_equal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fall_equal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fany_nequal16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fany_nequal2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fany_nequal3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fany_nequal4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fany_nequal8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| } |
| static void |
| evaluate_fceil(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fclamp_pos(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = fmax(src0, 0.0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = fmax(src0, 0.0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = fmax(src0, 0.0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fcos(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? cos(src0) : cosf(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? cos(src0) : cosf(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? cos(src0) : cosf(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fcsel(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| const float32_t src2 = |
| _src[2][_i].f32; |
| |
| float32_t dst = (src0 != 0.0f) ? src1 : src2; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| } |
| static void |
| evaluate_fddx(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float16_t dst = 0.0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float32_t dst = 0.0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float64_t dst = 0.0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fddx_coarse(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float16_t dst = 0.0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float32_t dst = 0.0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float64_t dst = 0.0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fddx_fine(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float16_t dst = 0.0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float32_t dst = 0.0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float64_t dst = 0.0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fddy(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float16_t dst = 0.0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float32_t dst = 0.0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float64_t dst = 0.0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fddy_coarse(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float16_t dst = 0.0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float32_t dst = 0.0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float64_t dst = 0.0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fddy_fine(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float16_t dst = 0.0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float32_t dst = 0.0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| |
| float64_t dst = 0.0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdiv(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst = src0 / src1; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = src0 / src1; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst = src0 / src1; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot16_replicated(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| _mesa_half_to_float(_src[0][8].u16), |
| _mesa_half_to_float(_src[0][9].u16), |
| _mesa_half_to_float(_src[0][10].u16), |
| _mesa_half_to_float(_src[0][11].u16), |
| _mesa_half_to_float(_src[0][12].u16), |
| _mesa_half_to_float(_src[0][13].u16), |
| _mesa_half_to_float(_src[0][14].u16), |
| _mesa_half_to_float(_src[0][15].u16), |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| _mesa_half_to_float(_src[1][8].u16), |
| _mesa_half_to_float(_src[1][9].u16), |
| _mesa_half_to_float(_src[1][10].u16), |
| _mesa_half_to_float(_src[1][11].u16), |
| _mesa_half_to_float(_src[1][12].u16), |
| _mesa_half_to_float(_src[1][13].u16), |
| _mesa_half_to_float(_src[1][14].u16), |
| _mesa_half_to_float(_src[1][15].u16), |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); |
| } else { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); |
| } else { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); |
| } else { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| _src[0][8].f32, |
| _src[0][9].f32, |
| _src[0][10].f32, |
| _src[0][11].f32, |
| _src[0][12].f32, |
| _src[0][13].f32, |
| _src[0][14].f32, |
| _src[0][15].f32, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| _src[1][8].f32, |
| _src[1][9].f32, |
| _src[1][10].f32, |
| _src[1][11].f32, |
| _src[1][12].f32, |
| _src[1][13].f32, |
| _src[1][14].f32, |
| _src[1][15].f32, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| _dst_val[2].f32 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 32); |
| } |
| _dst_val[3].f32 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| _src[0][8].f64, |
| _src[0][9].f64, |
| _src[0][10].f64, |
| _src[0][11].f64, |
| _src[0][12].f64, |
| _src[0][13].f64, |
| _src[0][14].f64, |
| _src[0][15].f64, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| _src[1][8].f64, |
| _src[1][9].f64, |
| _src[1][10].f64, |
| _src[1][11].f64, |
| _src[1][12].f64, |
| _src[1][13].f64, |
| _src[1][14].f64, |
| _src[1][15].f64, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| _dst_val[1].f64 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 64); |
| } |
| _dst_val[2].f64 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 64); |
| } |
| _dst_val[3].f64 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot2_replicated(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); |
| } else { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); |
| } else { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); |
| } else { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| _dst_val[2].f32 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 32); |
| } |
| _dst_val[3].f32 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| _dst_val[1].f64 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 64); |
| } |
| _dst_val[2].f64 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 64); |
| } |
| _dst_val[3].f64 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot3_replicated(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); |
| } else { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); |
| } else { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); |
| } else { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| _dst_val[2].f32 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 32); |
| } |
| _dst_val[3].f32 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| _dst_val[1].f64 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 64); |
| } |
| _dst_val[2].f64 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 64); |
| } |
| _dst_val[3].f64 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot4_replicated(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); |
| } else { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); |
| } else { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); |
| } else { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| _dst_val[2].f32 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 32); |
| } |
| _dst_val[3].f32 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| _dst_val[1].f64 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 64); |
| } |
| _dst_val[2].f64 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 64); |
| } |
| _dst_val[3].f64 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdot8_replicated(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| _mesa_half_to_float(_src[0][4].u16), |
| _mesa_half_to_float(_src[0][5].u16), |
| _mesa_half_to_float(_src[0][6].u16), |
| _mesa_half_to_float(_src[0][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| _mesa_half_to_float(_src[1][4].u16), |
| _mesa_half_to_float(_src[1][5].u16), |
| _mesa_half_to_float(_src[1][6].u16), |
| _mesa_half_to_float(_src[1][7].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); |
| } else { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); |
| } else { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); |
| } else { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| _src[0][4].f32, |
| _src[0][5].f32, |
| _src[0][6].f32, |
| _src[0][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| _src[1][4].f32, |
| _src[1][5].f32, |
| _src[1][6].f32, |
| _src[1][7].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| _dst_val[2].f32 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 32); |
| } |
| _dst_val[3].f32 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| _src[0][4].f64, |
| _src[0][5].f64, |
| _src[0][6].f64, |
| _src[0][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| _src[1][4].f64, |
| _src[1][5].f64, |
| _src[1][6].f64, |
| _src[1][7].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| _dst_val[1].f64 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 64); |
| } |
| _dst_val[2].f64 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 64); |
| } |
| _dst_val[3].f64 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdph(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fdph_replicated(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float16_vec src1 = { |
| _mesa_half_to_float(_src[1][0].u16), |
| _mesa_half_to_float(_src[1][1].u16), |
| _mesa_half_to_float(_src[1][2].u16), |
| _mesa_half_to_float(_src[1][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); |
| } else { |
| _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); |
| } else { |
| _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 16); |
| } |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); |
| } else { |
| _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| _src[1][1].f32, |
| _src[1][2].f32, |
| _src[1][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| _dst_val[2].f32 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 32); |
| } |
| _dst_val[3].f32 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float64_vec src1 = { |
| _src[1][0].f64, |
| _src[1][1].f64, |
| _src[1][2].f64, |
| _src[1][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| _dst_val[1].f64 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 64); |
| } |
| _dst_val[2].f64 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 64); |
| } |
| _dst_val[3].f64 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_feq(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool1_t dst = src0 == src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool1_t dst = src0 == src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool1_t dst = src0 == src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_feq16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool16_t dst = src0 == src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool16_t dst = src0 == src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool16_t dst = src0 == src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_feq32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool32_t dst = src0 == src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool32_t dst = src0 == src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool32_t dst = src0 == src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_feq8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool8_t dst = src0 == src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool8_t dst = src0 == src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool8_t dst = src0 == src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fexp2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = exp2f(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = exp2f(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = exp2f(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ffloor(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? floor(src0) : floorf(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? floor(src0) : floorf(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? floor(src0) : floorf(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ffma(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| const float src2 = |
| _mesa_half_to_float(_src[2][_i].u16); |
| |
| float16_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_fma_rtz(src0, src1, src2); |
| else if (bit_size == 32) |
| dst = _mesa_float_fma_rtz(src0, src1, src2); |
| else |
| dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2)); |
| } else { |
| if (bit_size == 32) |
| dst = fmaf(src0, src1, src2); |
| else |
| dst = fma(src0, src1, src2); |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| const float32_t src2 = |
| _src[2][_i].f32; |
| |
| float32_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_fma_rtz(src0, src1, src2); |
| else if (bit_size == 32) |
| dst = _mesa_float_fma_rtz(src0, src1, src2); |
| else |
| dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2)); |
| } else { |
| if (bit_size == 32) |
| dst = fmaf(src0, src1, src2); |
| else |
| dst = fma(src0, src1, src2); |
| } |
| |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| const float64_t src2 = |
| _src[2][_i].f64; |
| |
| float64_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_fma_rtz(src0, src1, src2); |
| else if (bit_size == 32) |
| dst = _mesa_float_fma_rtz(src0, src1, src2); |
| else |
| dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2)); |
| } else { |
| if (bit_size == 32) |
| dst = fmaf(src0, src1, src2); |
| else |
| dst = fma(src0, src1, src2); |
| } |
| |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ffract(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fge(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fge16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fge32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fge8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_find_lsb(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (unsigned bit = 0; bit < bit_size; bit++) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fisfinite(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| bool1_t dst = isfinite(src0); |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| bool1_t dst = isfinite(src0); |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| bool1_t dst = isfinite(src0); |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fisnormal(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| bool1_t dst = isnormal(src0); |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| bool1_t dst = isnormal(src0); |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| bool1_t dst = isnormal(src0); |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_flog2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = log2f(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = log2f(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = log2f(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_flrp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| const float src2 = |
| _mesa_half_to_float(_src[2][_i].u16); |
| |
| float16_t dst = src0 * (1 - src2) + src1 * src2; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| const float32_t src2 = |
| _src[2][_i].f32; |
| |
| float32_t dst = src0 * (1 - src2) + src1 * src2; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| const float64_t src2 = |
| _src[2][_i].f64; |
| |
| float64_t dst = src0 * (1 - src2) + src1 * src2; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_flt(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_flt16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_flt32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_flt8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fmax(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst = fmax(src0, src1); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = fmax(src0, src1); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst = fmax(src0, src1); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fmin(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst = fmin(src0, src1); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = fmin(src0, src1); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst = fmin(src0, src1); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fmod(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst = src0 - src1 * floorf(src0 / src1); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = src0 - src1 * floorf(src0 / src1); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst = src0 - src1 * floorf(src0 / src1); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fmul(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_mul_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 * (double)src1); |
| } else { |
| dst = src0 * src1; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_mul_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 * (double)src1); |
| } else { |
| dst = src0 * src1; |
| } |
| |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_mul_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 * (double)src1); |
| } else { |
| dst = src0 * src1; |
| } |
| |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fneg(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = -src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = -src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = -src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fneu(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool1_t dst = src0 != src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool1_t dst = src0 != src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool1_t dst = src0 != src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fneu16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool16_t dst = src0 != src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool16_t dst = src0 != src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool16_t dst = src0 != src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fneu32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool32_t dst = src0 != src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool32_t dst = src0 != src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool32_t dst = src0 != src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fneu8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| bool8_t dst = src0 != src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| bool8_t dst = src0 != src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| bool8_t dst = src0 != src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fpow(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fquantize2f16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); |
| |
| _dst_val[_i].f32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); |
| |
| _dst_val[_i].f64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_frcp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_frem(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst = src0 - src1 * truncf(src0 / src1); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = src0 - src1 * truncf(src0 / src1); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst = src0 - src1 * truncf(src0 / src1); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_frexp_exp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| int32_t dst; |
| |
| frexp(src0, &dst); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| int32_t dst; |
| |
| frexp(src0, &dst); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| int32_t dst; |
| |
| frexp(src0, &dst); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_frexp_sig(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst; |
| |
| int n; dst = frexp(src0, &n); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst; |
| |
| int n; dst = frexp(src0, &n); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst; |
| |
| int n; dst = frexp(src0, &n); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fround_even(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_frsq(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsat(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = fmin(fmax(src0, 0.0), 1.0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = fmin(fmax(src0, 0.0), 1.0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = fmin(fmax(src0, 0.0), 1.0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsat_signed(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = fmin(fmax(src0, -1.0), 1.0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = fmin(fmax(src0, -1.0), 1.0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = fmin(fmax(src0, -1.0), 1.0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsign(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? ((src0 == 0.0) ? 0.0 : ((src0 > 0.0) ? 1.0 : -1.0)) : ((src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? ((src0 == 0.0) ? 0.0 : ((src0 > 0.0) ? 1.0 : -1.0)) : ((src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? ((src0 == 0.0) ? 0.0 : ((src0 > 0.0) ? 1.0 : -1.0)) : ((src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsin(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? sin(src0) : sinf(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? sin(src0) : sinf(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? sin(src0) : sinf(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsqrt(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsub(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_sub_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 - (double)src1); |
| } else { |
| dst = src0 - src1; |
| } |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_sub_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 - (double)src1); |
| } else { |
| dst = src0 - src1; |
| } |
| |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst; |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { |
| if (bit_size == 64) |
| dst = _mesa_double_sub_rtz(src0, src1); |
| else |
| dst = _mesa_double_to_float_rtz((double)src0 - (double)src1); |
| } else { |
| dst = src0 - src1; |
| } |
| |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsum2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsum3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_fsum4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| const struct float16_vec src0 = { |
| _mesa_half_to_float(_src[0][0].u16), |
| _mesa_half_to_float(_src[0][1].u16), |
| _mesa_half_to_float(_src[0][2].u16), |
| _mesa_half_to_float(_src[0][3].u16), |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float16_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z) + (src0.w)); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); |
| } else { |
| _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 16); |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z) + (src0.w)); |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct float64_vec src0 = { |
| _src[0][0].f64, |
| _src[0][1].f64, |
| _src[0][2].f64, |
| _src[0][3].f64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float64_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z) + (src0.w)); |
| |
| _dst_val[0].f64 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 64); |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ftrunc(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| |
| float16_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| |
| float32_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| |
| float64_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2b1(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| bool1_t dst = src0 != 0; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2b16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| bool16_t dst = src0 != 0; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2b32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| bool32_t dst = src0 != 0; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2b8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| bool8_t dst = src0 != 0; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2f16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2f32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2f64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2fmp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| } |
| static void |
| evaluate_i2i1(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2i16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2i32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int32_t dst = src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2i64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int64_t dst = src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2i8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int8_t dst = src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_i2imp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int16_t dst = src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_iabs(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int1_t dst = (src0 < 0) ? -src0 : src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int8_t dst = (src0 < 0) ? -src0 : src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int16_t dst = (src0 < 0) ? -src0 : src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int32_t dst = (src0 < 0) ? -src0 : src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int64_t dst = (src0 < 0) ? -src0 : src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_iadd(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = src0 + src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = src0 + src1; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = src0 + src1; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src0 + src1; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = src0 + src1; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_iadd_sat(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = |
| src1 > 0 ? |
| (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : |
| (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) |
| ; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = |
| src1 > 0 ? |
| (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : |
| (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) |
| ; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = |
| src1 > 0 ? |
| (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : |
| (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) |
| ; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = |
| src1 > 0 ? |
| (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : |
| (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) |
| ; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = |
| src1 > 0 ? |
| (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : |
| (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) |
| ; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_iand(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src0 & src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src0 & src1; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src0 & src1; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src0 & src1; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src0 & src1; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ibfe(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| int32_t dst; |
| |
| |
| int base = src0; |
| unsigned offset = src1 & 0x1F; |
| unsigned bits = src2 & 0x1F; |
| if (bits == 0) { |
| dst = 0; |
| } else if (offset + bits < 32) { |
| dst = (base << (32 - bits - offset)) >> (32 - bits); |
| } else { |
| dst = base >> offset; |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_ibitfield_extract(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| const int32_t src2 = |
| _src[2][_i].i32; |
| |
| int32_t dst; |
| |
| |
| int base = src0; |
| int offset = src1, bits = src2; |
| if (bits == 0) { |
| dst = 0; |
| } else if (offset < 0 || bits < 0 || offset + bits > 32) { |
| dst = 0; |
| } else { |
| dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */ |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_idiv(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ieq(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool1_t dst = src0 == src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool1_t dst = src0 == src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool1_t dst = src0 == src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool1_t dst = src0 == src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool1_t dst = src0 == src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ieq16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool16_t dst = src0 == src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool16_t dst = src0 == src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool16_t dst = src0 == src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool16_t dst = src0 == src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool16_t dst = src0 == src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ieq32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool32_t dst = src0 == src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool32_t dst = src0 == src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool32_t dst = src0 == src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool32_t dst = src0 == src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool32_t dst = src0 == src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ieq8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool8_t dst = src0 == src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool8_t dst = src0 == src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool8_t dst = src0 == src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool8_t dst = src0 == src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool8_t dst = src0 == src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ifind_msb(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (int bit = 31; bit >= 0; bit--) { |
| /* If src0 < 0, we're looking for the first 0 bit. |
| * if src0 >= 0, we're looking for the first 1 bit. |
| */ |
| if ((((src0 >> bit) & 1) && (src0 >= 0)) || |
| (!((src0 >> bit) & 1) && (src0 < 0))) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_ige(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ige16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ige32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ige8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ihadd(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ilt(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ilt16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ilt32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ilt8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_imad24_ir3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| const int32_t src2 = |
| _src[2][_i].i32; |
| |
| int32_t dst = (((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8) + src2; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_imadsh_mix16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| const int32_t src2 = |
| _src[2][_i].i32; |
| |
| int32_t dst; |
| |
| |
| dst = ((((src0 & 0xffff0000) >> 16) * (src1 & 0x0000ffff)) << 16) + src2; |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_imax(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = src1 > src0 ? src1 : src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = src1 > src0 ? src1 : src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = src1 > src0 ? src1 : src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src1 > src0 ? src1 : src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = src1 > src0 ? src1 : src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_imin(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = src1 > src0 ? src0 : src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = src1 > src0 ? src0 : src1; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = src1 > src0 ? src0 : src1; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src1 > src0 ? src0 : src1; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = src1 > src0 ? src0 : src1; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_imod(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_imul(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst; |
| |
| |
| /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ |
| dst = (uint64_t)src0 * (uint64_t)src1; |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst; |
| |
| |
| /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ |
| dst = (uint64_t)src0 * (uint64_t)src1; |
| |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst; |
| |
| |
| /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ |
| dst = (uint64_t)src0 * (uint64_t)src1; |
| |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst; |
| |
| |
| /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ |
| dst = (uint64_t)src0 * (uint64_t)src1; |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst; |
| |
| |
| /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ |
| dst = (uint64_t)src0 * (uint64_t)src1; |
| |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_imul24(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = (((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_imul_2x32_64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int64_t dst = (int64_t)src0 * (int64_t)src1; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_imul_32x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src0 * (int16_t) src1; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_imul_high(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* We need to do a full 128-bit x 128-bit multiply in order for the sign |
| * extension to work properly. The casts are kind-of annoying but needed |
| * to prevent compiler warnings. |
| */ |
| uint32_t src0_u32[4] = { |
| src0, |
| (int64_t)src0 >> 32, |
| (int64_t)src0 >> 63, |
| (int64_t)src0 >> 63, |
| }; |
| uint32_t src1_u32[4] = { |
| src1, |
| (int64_t)src1 >> 32, |
| (int64_t)src1 >> 63, |
| (int64_t)src1 >> 63, |
| }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| /* First, sign-extend to 64-bit, then convert to unsigned to prevent |
| * potential overflow of signed multiply */ |
| dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; |
| } |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* We need to do a full 128-bit x 128-bit multiply in order for the sign |
| * extension to work properly. The casts are kind-of annoying but needed |
| * to prevent compiler warnings. |
| */ |
| uint32_t src0_u32[4] = { |
| src0, |
| (int64_t)src0 >> 32, |
| (int64_t)src0 >> 63, |
| (int64_t)src0 >> 63, |
| }; |
| uint32_t src1_u32[4] = { |
| src1, |
| (int64_t)src1 >> 32, |
| (int64_t)src1 >> 63, |
| (int64_t)src1 >> 63, |
| }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| /* First, sign-extend to 64-bit, then convert to unsigned to prevent |
| * potential overflow of signed multiply */ |
| dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; |
| } |
| |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* We need to do a full 128-bit x 128-bit multiply in order for the sign |
| * extension to work properly. The casts are kind-of annoying but needed |
| * to prevent compiler warnings. |
| */ |
| uint32_t src0_u32[4] = { |
| src0, |
| (int64_t)src0 >> 32, |
| (int64_t)src0 >> 63, |
| (int64_t)src0 >> 63, |
| }; |
| uint32_t src1_u32[4] = { |
| src1, |
| (int64_t)src1 >> 32, |
| (int64_t)src1 >> 63, |
| (int64_t)src1 >> 63, |
| }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| /* First, sign-extend to 64-bit, then convert to unsigned to prevent |
| * potential overflow of signed multiply */ |
| dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; |
| } |
| |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* We need to do a full 128-bit x 128-bit multiply in order for the sign |
| * extension to work properly. The casts are kind-of annoying but needed |
| * to prevent compiler warnings. |
| */ |
| uint32_t src0_u32[4] = { |
| src0, |
| (int64_t)src0 >> 32, |
| (int64_t)src0 >> 63, |
| (int64_t)src0 >> 63, |
| }; |
| uint32_t src1_u32[4] = { |
| src1, |
| (int64_t)src1 >> 32, |
| (int64_t)src1 >> 63, |
| (int64_t)src1 >> 63, |
| }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| /* First, sign-extend to 64-bit, then convert to unsigned to prevent |
| * potential overflow of signed multiply */ |
| dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* We need to do a full 128-bit x 128-bit multiply in order for the sign |
| * extension to work properly. The casts are kind-of annoying but needed |
| * to prevent compiler warnings. |
| */ |
| uint32_t src0_u32[4] = { |
| src0, |
| (int64_t)src0 >> 32, |
| (int64_t)src0 >> 63, |
| (int64_t)src0 >> 63, |
| }; |
| uint32_t src1_u32[4] = { |
| src1, |
| (int64_t)src1 >> 32, |
| (int64_t)src1 >> 63, |
| (int64_t)src1 >> 63, |
| }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| /* First, sign-extend to 64-bit, then convert to unsigned to prevent |
| * potential overflow of signed multiply */ |
| dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; |
| } |
| |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ine(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool1_t dst = src0 != src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool1_t dst = src0 != src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool1_t dst = src0 != src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool1_t dst = src0 != src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool1_t dst = src0 != src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ine16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool16_t dst = src0 != src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool16_t dst = src0 != src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool16_t dst = src0 != src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool16_t dst = src0 != src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool16_t dst = src0 != src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ine32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool32_t dst = src0 != src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool32_t dst = src0 != src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool32_t dst = src0 != src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool32_t dst = src0 != src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool32_t dst = src0 != src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ine8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| bool8_t dst = src0 != src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| bool8_t dst = src0 != src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| bool8_t dst = src0 != src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| bool8_t dst = src0 != src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| bool8_t dst = src0 != src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ineg(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int1_t dst = -src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int8_t dst = -src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int16_t dst = -src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int32_t dst = -src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int64_t dst = -src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_inot(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int1_t dst = ~src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int8_t dst = ~src0; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int16_t dst = ~src0; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int32_t dst = ~src0; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int64_t dst = ~src0; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ior(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src0 | src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src0 | src1; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src0 | src1; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src0 | src1; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src0 | src1; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_irem(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_irhadd(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ishl(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int1_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int8_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int16_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int32_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int64_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ishr(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int1_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int8_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int16_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int32_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| int64_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_isign(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| |
| int1_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| |
| int8_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| |
| int16_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| |
| int32_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| |
| int64_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_isub(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = src0 - src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = src0 - src1; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = src0 - src1; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = src0 - src1; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = src0 - src1; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_isub_sat(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| int1_t dst = |
| src1 < 0 ? |
| (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : |
| (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) |
| ; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| int8_t dst = |
| src1 < 0 ? |
| (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : |
| (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) |
| ; |
| |
| _dst_val[_i].i8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| int16_t dst = |
| src1 < 0 ? |
| (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : |
| (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) |
| ; |
| |
| _dst_val[_i].i16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = |
| src1 < 0 ? |
| (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : |
| (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) |
| ; |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| int64_t dst = |
| src1 < 0 ? |
| (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : |
| (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) |
| ; |
| |
| _dst_val[_i].i64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ixor(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src0 ^ src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src0 ^ src1; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src0 ^ src1; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src0 ^ src1; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src0 ^ src1; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ldexp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| float16_t dst; |
| |
| |
| dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); |
| /* flush denormals to zero. */ |
| if (!isnormal(dst)) |
| dst = copysignf(0.0f, src0); |
| |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| float32_t dst; |
| |
| |
| dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); |
| /* flush denormals to zero. */ |
| if (!isnormal(dst)) |
| dst = copysignf(0.0f, src0); |
| |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| float64_t dst; |
| |
| |
| dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); |
| /* flush denormals to zero. */ |
| if (!isnormal(dst)) |
| dst = copysignf(0.0f, src0); |
| |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_mov(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_pack_32_2x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint16_vec src0 = { |
| _src[0][0].u16, |
| _src[0][1].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| dst.x = src0.x | ((uint32_t)src0.y << 16); |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_32_2x16_split(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint32_t dst = src0 | ((uint32_t)src1 << 16); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_pack_32_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint8_vec src0 = { |
| _src[0][0].u8, |
| _src[0][1].u8, |
| _src[0][2].u8, |
| _src[0][3].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| dst.x = src0.x | ((uint32_t)src0.y << 8) | ((uint32_t)src0.z << 16) | ((uint32_t)src0.w << 24); |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_64_2x32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| _src[0][1].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint64_vec dst; |
| |
| dst.x = src0.x | ((uint64_t)src0.y << 32); |
| |
| _dst_val[0].u64 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_64_2x32_split(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint64_t dst = src0 | ((uint64_t)src1 << 32); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_pack_64_4x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint16_vec src0 = { |
| _src[0][0].u16, |
| _src[0][1].u16, |
| _src[0][2].u16, |
| _src[0][3].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint64_vec dst; |
| |
| dst.x = src0.x | ((uint64_t)src0.y << 16) | ((uint64_t)src0.z << 32) | ((uint64_t)src0.w << 48); |
| |
| _dst_val[0].u64 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_half_2x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = (uint32_t) pack_half_1x16(src0.x); |
| dst.x |= ((uint32_t) pack_half_1x16(src0.y)) << 16; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_half_2x16_split(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct float32_vec src1 = { |
| _src[1][0].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| dst.x = dst.y = dst.z = dst.w = pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16); |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_snorm_2x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = (uint32_t) pack_snorm_1x16(src0.x); |
| dst.x |= ((uint32_t) pack_snorm_1x16(src0.y)) << 16; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_snorm_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = (uint32_t) pack_snorm_1x8(src0.x); |
| dst.x |= ((uint32_t) pack_snorm_1x8(src0.y)) << 8; |
| dst.x |= ((uint32_t) pack_snorm_1x8(src0.z)) << 16; |
| dst.x |= ((uint32_t) pack_snorm_1x8(src0.w)) << 24; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_unorm_2x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = (uint32_t) pack_unorm_1x16(src0.x); |
| dst.x |= ((uint32_t) pack_unorm_1x16(src0.y)) << 16; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_unorm_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct float32_vec src0 = { |
| _src[0][0].f32, |
| _src[0][1].f32, |
| _src[0][2].f32, |
| _src[0][3].f32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = (uint32_t) pack_unorm_1x8(src0.x); |
| dst.x |= ((uint32_t) pack_unorm_1x8(src0.y)) << 8; |
| dst.x |= ((uint32_t) pack_unorm_1x8(src0.z)) << 16; |
| dst.x |= ((uint32_t) pack_unorm_1x8(src0.w)) << 24; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_uvec2_to_uint(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| _src[0][1].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = (src0.x & 0xffff) | (src0.y << 16); |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_pack_uvec4_to_uint(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| _src[0][1].u32, |
| _src[0][2].u32, |
| _src[0][3].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = (src0.x << 0) | |
| (src0.y << 8) | |
| (src0.z << 16) | |
| (src0.w << 24); |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| |
| } |
| static void |
| evaluate_seq(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = (src0 == src1) ? 1.0f : 0.0f; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| } |
| static void |
| evaluate_sge(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float src0 = |
| _mesa_half_to_float(_src[0][_i].u16); |
| const float src1 = |
| _mesa_half_to_float(_src[1][_i].u16); |
| |
| float16_t dst = (src0 >= src1) ? 1.0f : 0.0f; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = (src0 >= src1) ? 1.0f : 0.0f; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float64_t src0 = |
| _src[0][_i].f64; |
| const float64_t src1 = |
| _src[1][_i].f64; |
| |
| float64_t dst = (src0 >= src1) ? 1.0f : 0.0f; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_slt(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = (src0 < src1) ? 1.0f : 0.0f; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| } |
| static void |
| evaluate_sne(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const float32_t src0 = |
| _src[0][_i].f32; |
| const float32_t src1 = |
| _src[1][_i].f32; |
| |
| float32_t dst = (src0 != src1) ? 1.0f : 0.0f; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| } |
| static void |
| evaluate_u2f16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_u2f32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| float32_t dst = src0; |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_u2f64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| float64_t dst = src0; |
| |
| _dst_val[_i].f64 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 64); |
| } |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_u2fmp(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| float16_t dst = src0; |
| |
| if (nir_is_rounding_mode_rtz(execution_mode, 16)) { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); |
| } else { |
| _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); |
| } |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 16); |
| } |
| } |
| |
| } |
| static void |
| evaluate_u2u1(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint1_t dst = src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_u2u16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_u2u32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_u2u64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint64_t dst = src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_u2u8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint8_t dst = src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uabs_isub(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src0 = -(int1_t)_src[0][_i].b; |
| /* 1-bit integers use a 0/-1 convention */ |
| const int1_t src1 = -(int1_t)_src[1][_i].b; |
| |
| uint1_t dst = |
| src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 |
| : (uint64_t) src0 - (uint64_t) src1 |
| ; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int8_t src0 = |
| _src[0][_i].i8; |
| const int8_t src1 = |
| _src[1][_i].i8; |
| |
| uint8_t dst = |
| src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 |
| : (uint64_t) src0 - (uint64_t) src1 |
| ; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int16_t src0 = |
| _src[0][_i].i16; |
| const int16_t src1 = |
| _src[1][_i].i16; |
| |
| uint16_t dst = |
| src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 |
| : (uint64_t) src0 - (uint64_t) src1 |
| ; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| uint32_t dst = |
| src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 |
| : (uint64_t) src0 - (uint64_t) src1 |
| ; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int64_t src0 = |
| _src[0][_i].i64; |
| const int64_t src1 = |
| _src[1][_i].i64; |
| |
| uint64_t dst = |
| src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 |
| : (uint64_t) src0 - (uint64_t) src1 |
| ; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uabs_usub(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uadd_carry(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src0 + src1 < src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src0 + src1 < src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src0 + src1 < src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src0 + src1 < src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src0 + src1 < src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uadd_sat(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ubfe(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| unsigned base = src0; |
| unsigned offset = src1 & 0x1F; |
| unsigned bits = src2 & 0x1F; |
| if (bits == 0) { |
| dst = 0; |
| } else if (offset + bits < 32) { |
| dst = (base << (32 - bits - offset)) >> (32 - bits); |
| } else { |
| dst = base >> offset; |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_ubitfield_extract(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| const int32_t src2 = |
| _src[2][_i].i32; |
| |
| uint32_t dst; |
| |
| |
| unsigned base = src0; |
| int offset = src1, bits = src2; |
| if (bits == 0) { |
| dst = 0; |
| } else if (bits < 0 || offset < 0 || offset + bits > 32) { |
| dst = 0; /* undefined per the spec */ |
| } else { |
| dst = (base >> offset) & ((1ull << bits) - 1); |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_uclz(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| int bit; |
| for (bit = bit_size - 1; bit >= 0; bit--) { |
| if ((src0 & (1u << bit)) != 0) |
| break; |
| } |
| dst = (unsigned)(31 - bit); |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_udiv(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src1 == 0 ? 0 : (src0 / src1); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ufind_msb(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (int bit = bit_size - 1; bit >= 0; bit--) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (int bit = bit_size - 1; bit >= 0; bit--) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (int bit = bit_size - 1; bit >= 0; bit--) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (int bit = bit_size - 1; bit >= 0; bit--) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| int32_t dst; |
| |
| |
| dst = -1; |
| for (int bit = bit_size - 1; bit >= 0; bit--) { |
| if ((src0 >> bit) & 1) { |
| dst = bit; |
| break; |
| } |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uge(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| bool1_t dst = src0 >= src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uge16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| bool16_t dst = src0 >= src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uge32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| bool32_t dst = src0 >= src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uge8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| bool8_t dst = src0 >= src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uhadd(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ult(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| bool1_t dst = src0 < src1; |
| |
| _dst_val[_i].b = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ult16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| bool16_t dst = src0 < src1; |
| |
| _dst_val[_i].i16 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ult32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| bool32_t dst = src0 < src1; |
| |
| _dst_val[_i].i32 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ult8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| bool8_t dst = src0 < src1; |
| |
| _dst_val[_i].i8 = -(int)dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_umad24(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| const uint32_t src2 = |
| _src[2][_i].u32; |
| |
| uint32_t dst = (((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8) + src2; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_umax(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src1 > src0 ? src1 : src0; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src1 > src0 ? src1 : src0; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src1 > src0 ? src1 : src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src1 > src0 ? src1 : src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src1 > src0 ? src1 : src0; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_umax_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst; |
| |
| |
| dst = 0; |
| for (int i = 0; i < 32; i += 8) { |
| dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_umin(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src1 > src0 ? src0 : src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src1 > src0 ? src0 : src1; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src1 > src0 ? src0 : src1; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src1 > src0 ? src0 : src1; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src1 > src0 ? src0 : src1; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_umin_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst; |
| |
| |
| dst = 0; |
| for (int i = 0; i < 32; i += 8) { |
| dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_umod(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src1 == 0 ? 0 : src0 % src1; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_umul24(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst = (((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8); |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_umul_2x32_64(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint64_t dst = (uint64_t)src0 * (uint64_t)src1; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_umul_32x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src0 * (uint16_t) src1; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_umul_high(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* The casts are kind-of annoying but needed to prevent compiler warnings. */ |
| uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; |
| uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; |
| } |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* The casts are kind-of annoying but needed to prevent compiler warnings. */ |
| uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; |
| uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; |
| } |
| |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* The casts are kind-of annoying but needed to prevent compiler warnings. */ |
| uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; |
| uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; |
| } |
| |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* The casts are kind-of annoying but needed to prevent compiler warnings. */ |
| uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; |
| uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; |
| } |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst; |
| |
| |
| if (bit_size == 64) { |
| /* The casts are kind-of annoying but needed to prevent compiler warnings. */ |
| uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; |
| uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; |
| uint32_t prod_u32[4]; |
| ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); |
| dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); |
| } else { |
| dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; |
| } |
| |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_umul_low(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| uint64_t mask = (1 << (bit_size / 2)) - 1; |
| dst = ((uint64_t)src0 & mask) * ((uint64_t)src1 & mask); |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_umul_unorm_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst; |
| |
| |
| dst = 0; |
| for (int i = 0; i < 32; i += 8) { |
| int src0_chan = (src0 >> i) & 0xff; |
| int src1_chan = (src1 >> i) & 0xff; |
| dst |= ((src0_chan * src1_chan) / 255) << i; |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_unpack_32_2x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint16_vec dst; |
| |
| dst.x = src0.x; dst.y = src0.x >> 16; |
| |
| _dst_val[0].u16 = dst.x; |
| |
| _dst_val[1].u16 = dst.y; |
| |
| |
| } |
| static void |
| evaluate_unpack_32_2x16_split_x(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint16_t dst = src0; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_unpack_32_2x16_split_y(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| uint16_t dst = src0 >> 16; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_unpack_32_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint8_vec dst; |
| |
| dst.x = src0.x; dst.y = src0.x >> 8; dst.z = src0.x >> 16; dst.w = src0.x >> 24; |
| |
| _dst_val[0].u8 = dst.x; |
| |
| _dst_val[1].u8 = dst.y; |
| |
| _dst_val[2].u8 = dst.z; |
| |
| _dst_val[3].u8 = dst.w; |
| |
| |
| } |
| static void |
| evaluate_unpack_64_2x32(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint64_vec src0 = { |
| _src[0][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| dst.x = src0.x; dst.y = src0.x >> 32; |
| |
| _dst_val[0].u32 = dst.x; |
| |
| _dst_val[1].u32 = dst.y; |
| |
| |
| } |
| static void |
| evaluate_unpack_64_2x32_split_x(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint32_t dst = src0; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_unpack_64_2x32_split_y(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| |
| uint32_t dst = src0 >> 32; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_unpack_64_4x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint64_vec src0 = { |
| _src[0][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint16_vec dst; |
| |
| dst.x = src0.x; dst.y = src0.x >> 16; dst.z = src0.x >> 32; dst.w = src0.w >> 48; |
| |
| _dst_val[0].u16 = dst.x; |
| |
| _dst_val[1].u16 = dst.y; |
| |
| _dst_val[2].u16 = dst.z; |
| |
| _dst_val[3].u16 = dst.w; |
| |
| |
| } |
| static void |
| evaluate_unpack_half_2x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| |
| dst.x = unpack_half_1x16((uint16_t)(src0.x & 0xffff)); |
| dst.y = unpack_half_1x16((uint16_t)(src0.x << 16)); |
| |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| |
| } |
| static void |
| evaluate_unpack_half_2x16_flush_to_zero(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| |
| dst.x = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x & 0xffff)); |
| dst.y = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x << 16)); |
| |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| |
| } |
| static void |
| evaluate_unpack_half_2x16_split_x(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| float32_t dst = unpack_half_1x16((uint16_t)(src0 & 0xffff)); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| } |
| static void |
| evaluate_unpack_half_2x16_split_x_flush_to_zero(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| float32_t dst = unpack_half_1x16_flush_to_zero((uint16_t)(src0 & 0xffff)); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| } |
| static void |
| evaluate_unpack_half_2x16_split_y(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| float32_t dst = unpack_half_1x16((uint16_t)(src0 >> 16)); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| } |
| static void |
| evaluate_unpack_half_2x16_split_y_flush_to_zero(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| |
| float32_t dst = unpack_half_1x16_flush_to_zero((uint16_t)(src0 >> 16)); |
| |
| _dst_val[_i].f32 = dst; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[_i], 32); |
| } |
| } |
| |
| } |
| static void |
| evaluate_unpack_snorm_2x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| |
| dst.x = unpack_snorm_1x16((uint16_t)(src0.x & 0xffff)); |
| dst.y = unpack_snorm_1x16((uint16_t)(src0.x << 16)); |
| |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| |
| } |
| static void |
| evaluate_unpack_snorm_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| |
| dst.x = unpack_snorm_1x8((uint8_t)(src0.x & 0xff)); |
| dst.y = unpack_snorm_1x8((uint8_t)((src0.x >> 8) & 0xff)); |
| dst.z = unpack_snorm_1x8((uint8_t)((src0.x >> 16) & 0xff)); |
| dst.w = unpack_snorm_1x8((uint8_t)(src0.x >> 24)); |
| |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| _dst_val[2].f32 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 32); |
| } |
| _dst_val[3].f32 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 32); |
| } |
| |
| } |
| static void |
| evaluate_unpack_unorm_2x16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| |
| dst.x = unpack_unorm_1x16((uint16_t)(src0.x & 0xffff)); |
| dst.y = unpack_unorm_1x16((uint16_t)(src0.x << 16)); |
| |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| |
| } |
| static void |
| evaluate_unpack_unorm_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct float32_vec dst; |
| |
| |
| dst.x = unpack_unorm_1x8((uint8_t)(src0.x & 0xff)); |
| dst.y = unpack_unorm_1x8((uint8_t)((src0.x >> 8) & 0xff)); |
| dst.z = unpack_unorm_1x8((uint8_t)((src0.x >> 16) & 0xff)); |
| dst.w = unpack_unorm_1x8((uint8_t)(src0.x >> 24)); |
| |
| |
| _dst_val[0].f32 = dst.x; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[0], 32); |
| } |
| _dst_val[1].f32 = dst.y; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[1], 32); |
| } |
| _dst_val[2].f32 = dst.z; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[2], 32); |
| } |
| _dst_val[3].f32 = dst.w; |
| |
| if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { |
| constant_denorm_flush_to_zero(&_dst_val[3], 32); |
| } |
| |
| } |
| static void |
| evaluate_urhadd(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_urol(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint1_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 << (src1 & rotate_mask)) | |
| (src0 >> (-src1 & rotate_mask)); |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint8_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 << (src1 & rotate_mask)) | |
| (src0 >> (-src1 & rotate_mask)); |
| |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint16_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 << (src1 & rotate_mask)) | |
| (src0 >> (-src1 & rotate_mask)); |
| |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 << (src1 & rotate_mask)) | |
| (src0 >> (-src1 & rotate_mask)); |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint64_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 << (src1 & rotate_mask)) | |
| (src0 >> (-src1 & rotate_mask)); |
| |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_uror(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint1_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 >> (src1 & rotate_mask)) | |
| (src0 << (-src1 & rotate_mask)); |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint8_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 >> (src1 & rotate_mask)) | |
| (src0 << (-src1 & rotate_mask)); |
| |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint16_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 >> (src1 & rotate_mask)) | |
| (src0 << (-src1 & rotate_mask)); |
| |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 >> (src1 & rotate_mask)) | |
| (src0 << (-src1 & rotate_mask)); |
| |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint64_t dst; |
| |
| |
| uint32_t rotate_mask = sizeof(src0) * 8 - 1; |
| dst = (src0 >> (src1 & rotate_mask)) | |
| (src0 << (-src1 & rotate_mask)); |
| |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_usadd_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst; |
| |
| |
| dst = 0; |
| for (int i = 0; i < 32; i += 8) { |
| dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_ushr(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint1_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint8_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint16_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint64_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_ussub_4x8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| UNUSED unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const int32_t src0 = |
| _src[0][_i].i32; |
| const int32_t src1 = |
| _src[1][_i].i32; |
| |
| int32_t dst; |
| |
| |
| dst = 0; |
| for (int i = 0; i < 32; i += 8) { |
| int src0_chan = (src0 >> i) & 0xff; |
| int src1_chan = (src1 >> i) & 0xff; |
| if (src0_chan > src1_chan) |
| dst |= (src0_chan - src1_chan) << i; |
| } |
| |
| |
| _dst_val[_i].i32 = dst; |
| |
| } |
| |
| } |
| static void |
| evaluate_usub_borrow(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src0 < src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src0 < src1; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src0 < src1; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src0 < src1; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src0 < src1; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_usub_sat(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint1_t src0 = |
| _src[0][_i].b; |
| const uint1_t src1 = |
| _src[1][_i].b; |
| |
| uint1_t dst = src0 < src1 ? 0 : src0 - src1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[_i].b = dst & 1; |
| |
| } |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint8_t src0 = |
| _src[0][_i].u8; |
| const uint8_t src1 = |
| _src[1][_i].u8; |
| |
| uint8_t dst = src0 < src1 ? 0 : src0 - src1; |
| |
| _dst_val[_i].u8 = dst; |
| |
| } |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint16_t src0 = |
| _src[0][_i].u16; |
| const uint16_t src1 = |
| _src[1][_i].u16; |
| |
| uint16_t dst = src0 < src1 ? 0 : src0 - src1; |
| |
| _dst_val[_i].u16 = dst; |
| |
| } |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint32_t src0 = |
| _src[0][_i].u32; |
| const uint32_t src1 = |
| _src[1][_i].u32; |
| |
| uint32_t dst = src0 < src1 ? 0 : src0 - src1; |
| |
| _dst_val[_i].u32 = dst; |
| |
| } |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| for (unsigned _i = 0; _i < num_components; _i++) { |
| const uint64_t src0 = |
| _src[0][_i].u64; |
| const uint64_t src1 = |
| _src[1][_i].u64; |
| |
| uint64_t dst = src0 < src1 ? 0 : src0 - src1; |
| |
| _dst_val[_i].u64 = dst; |
| |
| } |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_vec16(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct uint1_vec src0 = { |
| _src[0][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src1 = { |
| _src[1][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src2 = { |
| _src[2][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src3 = { |
| _src[3][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src4 = { |
| _src[4][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src5 = { |
| _src[5][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src6 = { |
| _src[6][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src7 = { |
| _src[7][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src8 = { |
| _src[8][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src9 = { |
| _src[9][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src10 = { |
| _src[10][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src11 = { |
| _src[11][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src12 = { |
| _src[12][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src13 = { |
| _src[13][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src14 = { |
| _src[14][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src15 = { |
| _src[15][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint1_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| dst.i = src8.x; |
| dst.j = src9.x; |
| dst.k = src10.x; |
| dst.l = src11.x; |
| dst.m = src12.x; |
| dst.n = src13.x; |
| dst.o = src14.x; |
| dst.p = src15.x; |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[0].b = dst.x & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[1].b = dst.y & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[2].b = dst.z & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[3].b = dst.w & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[4].b = dst.e & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[5].b = dst.f & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[6].b = dst.g & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[7].b = dst.h & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[8].b = dst.i & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[9].b = dst.j & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[10].b = dst.k & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[11].b = dst.l & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[12].b = dst.m & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[13].b = dst.n & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[14].b = dst.o & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[15].b = dst.p & 1; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct uint8_vec src0 = { |
| _src[0][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src1 = { |
| _src[1][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src2 = { |
| _src[2][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src3 = { |
| _src[3][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src4 = { |
| _src[4][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src5 = { |
| _src[5][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src6 = { |
| _src[6][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src7 = { |
| _src[7][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src8 = { |
| _src[8][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src9 = { |
| _src[9][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src10 = { |
| _src[10][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src11 = { |
| _src[11][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src12 = { |
| _src[12][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src13 = { |
| _src[13][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src14 = { |
| _src[14][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src15 = { |
| _src[15][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint8_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| dst.i = src8.x; |
| dst.j = src9.x; |
| dst.k = src10.x; |
| dst.l = src11.x; |
| dst.m = src12.x; |
| dst.n = src13.x; |
| dst.o = src14.x; |
| dst.p = src15.x; |
| |
| |
| _dst_val[0].u8 = dst.x; |
| |
| _dst_val[1].u8 = dst.y; |
| |
| _dst_val[2].u8 = dst.z; |
| |
| _dst_val[3].u8 = dst.w; |
| |
| _dst_val[4].u8 = dst.e; |
| |
| _dst_val[5].u8 = dst.f; |
| |
| _dst_val[6].u8 = dst.g; |
| |
| _dst_val[7].u8 = dst.h; |
| |
| _dst_val[8].u8 = dst.i; |
| |
| _dst_val[9].u8 = dst.j; |
| |
| _dst_val[10].u8 = dst.k; |
| |
| _dst_val[11].u8 = dst.l; |
| |
| _dst_val[12].u8 = dst.m; |
| |
| _dst_val[13].u8 = dst.n; |
| |
| _dst_val[14].u8 = dst.o; |
| |
| _dst_val[15].u8 = dst.p; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct uint16_vec src0 = { |
| _src[0][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src1 = { |
| _src[1][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src2 = { |
| _src[2][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src3 = { |
| _src[3][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src4 = { |
| _src[4][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src5 = { |
| _src[5][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src6 = { |
| _src[6][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src7 = { |
| _src[7][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src8 = { |
| _src[8][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src9 = { |
| _src[9][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src10 = { |
| _src[10][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src11 = { |
| _src[11][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src12 = { |
| _src[12][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src13 = { |
| _src[13][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src14 = { |
| _src[14][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src15 = { |
| _src[15][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint16_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| dst.i = src8.x; |
| dst.j = src9.x; |
| dst.k = src10.x; |
| dst.l = src11.x; |
| dst.m = src12.x; |
| dst.n = src13.x; |
| dst.o = src14.x; |
| dst.p = src15.x; |
| |
| |
| _dst_val[0].u16 = dst.x; |
| |
| _dst_val[1].u16 = dst.y; |
| |
| _dst_val[2].u16 = dst.z; |
| |
| _dst_val[3].u16 = dst.w; |
| |
| _dst_val[4].u16 = dst.e; |
| |
| _dst_val[5].u16 = dst.f; |
| |
| _dst_val[6].u16 = dst.g; |
| |
| _dst_val[7].u16 = dst.h; |
| |
| _dst_val[8].u16 = dst.i; |
| |
| _dst_val[9].u16 = dst.j; |
| |
| _dst_val[10].u16 = dst.k; |
| |
| _dst_val[11].u16 = dst.l; |
| |
| _dst_val[12].u16 = dst.m; |
| |
| _dst_val[13].u16 = dst.n; |
| |
| _dst_val[14].u16 = dst.o; |
| |
| _dst_val[15].u16 = dst.p; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src1 = { |
| _src[1][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src2 = { |
| _src[2][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src3 = { |
| _src[3][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src4 = { |
| _src[4][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src5 = { |
| _src[5][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src6 = { |
| _src[6][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src7 = { |
| _src[7][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src8 = { |
| _src[8][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src9 = { |
| _src[9][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src10 = { |
| _src[10][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src11 = { |
| _src[11][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src12 = { |
| _src[12][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src13 = { |
| _src[13][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src14 = { |
| _src[14][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src15 = { |
| _src[15][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| dst.i = src8.x; |
| dst.j = src9.x; |
| dst.k = src10.x; |
| dst.l = src11.x; |
| dst.m = src12.x; |
| dst.n = src13.x; |
| dst.o = src14.x; |
| dst.p = src15.x; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| _dst_val[1].u32 = dst.y; |
| |
| _dst_val[2].u32 = dst.z; |
| |
| _dst_val[3].u32 = dst.w; |
| |
| _dst_val[4].u32 = dst.e; |
| |
| _dst_val[5].u32 = dst.f; |
| |
| _dst_val[6].u32 = dst.g; |
| |
| _dst_val[7].u32 = dst.h; |
| |
| _dst_val[8].u32 = dst.i; |
| |
| _dst_val[9].u32 = dst.j; |
| |
| _dst_val[10].u32 = dst.k; |
| |
| _dst_val[11].u32 = dst.l; |
| |
| _dst_val[12].u32 = dst.m; |
| |
| _dst_val[13].u32 = dst.n; |
| |
| _dst_val[14].u32 = dst.o; |
| |
| _dst_val[15].u32 = dst.p; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct uint64_vec src0 = { |
| _src[0][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src1 = { |
| _src[1][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src2 = { |
| _src[2][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src3 = { |
| _src[3][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src4 = { |
| _src[4][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src5 = { |
| _src[5][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src6 = { |
| _src[6][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src7 = { |
| _src[7][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src8 = { |
| _src[8][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src9 = { |
| _src[9][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src10 = { |
| _src[10][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src11 = { |
| _src[11][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src12 = { |
| _src[12][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src13 = { |
| _src[13][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src14 = { |
| _src[14][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src15 = { |
| _src[15][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint64_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| dst.i = src8.x; |
| dst.j = src9.x; |
| dst.k = src10.x; |
| dst.l = src11.x; |
| dst.m = src12.x; |
| dst.n = src13.x; |
| dst.o = src14.x; |
| dst.p = src15.x; |
| |
| |
| _dst_val[0].u64 = dst.x; |
| |
| _dst_val[1].u64 = dst.y; |
| |
| _dst_val[2].u64 = dst.z; |
| |
| _dst_val[3].u64 = dst.w; |
| |
| _dst_val[4].u64 = dst.e; |
| |
| _dst_val[5].u64 = dst.f; |
| |
| _dst_val[6].u64 = dst.g; |
| |
| _dst_val[7].u64 = dst.h; |
| |
| _dst_val[8].u64 = dst.i; |
| |
| _dst_val[9].u64 = dst.j; |
| |
| _dst_val[10].u64 = dst.k; |
| |
| _dst_val[11].u64 = dst.l; |
| |
| _dst_val[12].u64 = dst.m; |
| |
| _dst_val[13].u64 = dst.n; |
| |
| _dst_val[14].u64 = dst.o; |
| |
| _dst_val[15].u64 = dst.p; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_vec2(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct uint1_vec src0 = { |
| _src[0][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src1 = { |
| _src[1][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint1_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[0].b = dst.x & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[1].b = dst.y & 1; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct uint8_vec src0 = { |
| _src[0][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src1 = { |
| _src[1][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint8_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| |
| |
| _dst_val[0].u8 = dst.x; |
| |
| _dst_val[1].u8 = dst.y; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct uint16_vec src0 = { |
| _src[0][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src1 = { |
| _src[1][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint16_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| |
| |
| _dst_val[0].u16 = dst.x; |
| |
| _dst_val[1].u16 = dst.y; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src1 = { |
| _src[1][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| _dst_val[1].u32 = dst.y; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct uint64_vec src0 = { |
| _src[0][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src1 = { |
| _src[1][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint64_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| |
| |
| _dst_val[0].u64 = dst.x; |
| |
| _dst_val[1].u64 = dst.y; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_vec3(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct uint1_vec src0 = { |
| _src[0][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src1 = { |
| _src[1][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src2 = { |
| _src[2][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint1_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[0].b = dst.x & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[1].b = dst.y & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[2].b = dst.z & 1; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct uint8_vec src0 = { |
| _src[0][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src1 = { |
| _src[1][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src2 = { |
| _src[2][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint8_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| |
| |
| _dst_val[0].u8 = dst.x; |
| |
| _dst_val[1].u8 = dst.y; |
| |
| _dst_val[2].u8 = dst.z; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct uint16_vec src0 = { |
| _src[0][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src1 = { |
| _src[1][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src2 = { |
| _src[2][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint16_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| |
| |
| _dst_val[0].u16 = dst.x; |
| |
| _dst_val[1].u16 = dst.y; |
| |
| _dst_val[2].u16 = dst.z; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src1 = { |
| _src[1][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src2 = { |
| _src[2][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| _dst_val[1].u32 = dst.y; |
| |
| _dst_val[2].u32 = dst.z; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct uint64_vec src0 = { |
| _src[0][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src1 = { |
| _src[1][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src2 = { |
| _src[2][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint64_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| |
| |
| _dst_val[0].u64 = dst.x; |
| |
| _dst_val[1].u64 = dst.y; |
| |
| _dst_val[2].u64 = dst.z; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_vec4(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct uint1_vec src0 = { |
| _src[0][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src1 = { |
| _src[1][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src2 = { |
| _src[2][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src3 = { |
| _src[3][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint1_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[0].b = dst.x & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[1].b = dst.y & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[2].b = dst.z & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[3].b = dst.w & 1; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct uint8_vec src0 = { |
| _src[0][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src1 = { |
| _src[1][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src2 = { |
| _src[2][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src3 = { |
| _src[3][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint8_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| |
| |
| _dst_val[0].u8 = dst.x; |
| |
| _dst_val[1].u8 = dst.y; |
| |
| _dst_val[2].u8 = dst.z; |
| |
| _dst_val[3].u8 = dst.w; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct uint16_vec src0 = { |
| _src[0][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src1 = { |
| _src[1][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src2 = { |
| _src[2][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src3 = { |
| _src[3][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint16_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| |
| |
| _dst_val[0].u16 = dst.x; |
| |
| _dst_val[1].u16 = dst.y; |
| |
| _dst_val[2].u16 = dst.z; |
| |
| _dst_val[3].u16 = dst.w; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src1 = { |
| _src[1][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src2 = { |
| _src[2][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src3 = { |
| _src[3][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| _dst_val[1].u32 = dst.y; |
| |
| _dst_val[2].u32 = dst.z; |
| |
| _dst_val[3].u32 = dst.w; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct uint64_vec src0 = { |
| _src[0][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src1 = { |
| _src[1][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src2 = { |
| _src[2][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src3 = { |
| _src[3][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint64_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| |
| |
| _dst_val[0].u64 = dst.x; |
| |
| _dst_val[1].u64 = dst.y; |
| |
| _dst_val[2].u64 = dst.z; |
| |
| _dst_val[3].u64 = dst.w; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| static void |
| evaluate_vec8(nir_const_value *_dst_val, |
| UNUSED unsigned num_components, |
| unsigned bit_size, |
| UNUSED nir_const_value **_src, |
| UNUSED unsigned execution_mode) |
| { |
| switch (bit_size) { |
| case 1: { |
| |
| |
| |
| |
| const struct uint1_vec src0 = { |
| _src[0][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src1 = { |
| _src[1][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src2 = { |
| _src[2][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src3 = { |
| _src[3][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src4 = { |
| _src[4][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src5 = { |
| _src[5][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src6 = { |
| _src[6][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint1_vec src7 = { |
| _src[7][0].b, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint1_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[0].b = dst.x & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[1].b = dst.y & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[2].b = dst.z & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[3].b = dst.w & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[4].b = dst.e & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[5].b = dst.f & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[6].b = dst.g & 1; |
| |
| /* 1-bit integers get truncated */ |
| _dst_val[7].b = dst.h & 1; |
| |
| |
| break; |
| } |
| case 8: { |
| |
| |
| |
| |
| const struct uint8_vec src0 = { |
| _src[0][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src1 = { |
| _src[1][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src2 = { |
| _src[2][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src3 = { |
| _src[3][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src4 = { |
| _src[4][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src5 = { |
| _src[5][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src6 = { |
| _src[6][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint8_vec src7 = { |
| _src[7][0].u8, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint8_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| |
| |
| _dst_val[0].u8 = dst.x; |
| |
| _dst_val[1].u8 = dst.y; |
| |
| _dst_val[2].u8 = dst.z; |
| |
| _dst_val[3].u8 = dst.w; |
| |
| _dst_val[4].u8 = dst.e; |
| |
| _dst_val[5].u8 = dst.f; |
| |
| _dst_val[6].u8 = dst.g; |
| |
| _dst_val[7].u8 = dst.h; |
| |
| |
| break; |
| } |
| case 16: { |
| |
| |
| |
| |
| const struct uint16_vec src0 = { |
| _src[0][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src1 = { |
| _src[1][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src2 = { |
| _src[2][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src3 = { |
| _src[3][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src4 = { |
| _src[4][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src5 = { |
| _src[5][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src6 = { |
| _src[6][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint16_vec src7 = { |
| _src[7][0].u16, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint16_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| |
| |
| _dst_val[0].u16 = dst.x; |
| |
| _dst_val[1].u16 = dst.y; |
| |
| _dst_val[2].u16 = dst.z; |
| |
| _dst_val[3].u16 = dst.w; |
| |
| _dst_val[4].u16 = dst.e; |
| |
| _dst_val[5].u16 = dst.f; |
| |
| _dst_val[6].u16 = dst.g; |
| |
| _dst_val[7].u16 = dst.h; |
| |
| |
| break; |
| } |
| case 32: { |
| |
| |
| |
| |
| const struct uint32_vec src0 = { |
| _src[0][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src1 = { |
| _src[1][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src2 = { |
| _src[2][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src3 = { |
| _src[3][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src4 = { |
| _src[4][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src5 = { |
| _src[5][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src6 = { |
| _src[6][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint32_vec src7 = { |
| _src[7][0].u32, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint32_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| |
| |
| _dst_val[0].u32 = dst.x; |
| |
| _dst_val[1].u32 = dst.y; |
| |
| _dst_val[2].u32 = dst.z; |
| |
| _dst_val[3].u32 = dst.w; |
| |
| _dst_val[4].u32 = dst.e; |
| |
| _dst_val[5].u32 = dst.f; |
| |
| _dst_val[6].u32 = dst.g; |
| |
| _dst_val[7].u32 = dst.h; |
| |
| |
| break; |
| } |
| case 64: { |
| |
| |
| |
| |
| const struct uint64_vec src0 = { |
| _src[0][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src1 = { |
| _src[1][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src2 = { |
| _src[2][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src3 = { |
| _src[3][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src4 = { |
| _src[4][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src5 = { |
| _src[5][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src6 = { |
| _src[6][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| const struct uint64_vec src7 = { |
| _src[7][0].u64, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| }; |
| |
| struct uint64_vec dst; |
| |
| |
| dst.x = src0.x; |
| dst.y = src1.x; |
| dst.z = src2.x; |
| dst.w = src3.x; |
| dst.e = src4.x; |
| dst.f = src5.x; |
| dst.g = src6.x; |
| dst.h = src7.x; |
| |
| |
| _dst_val[0].u64 = dst.x; |
| |
| _dst_val[1].u64 = dst.y; |
| |
| _dst_val[2].u64 = dst.z; |
| |
| _dst_val[3].u64 = dst.w; |
| |
| _dst_val[4].u64 = dst.e; |
| |
| _dst_val[5].u64 = dst.f; |
| |
| _dst_val[6].u64 = dst.g; |
| |
| _dst_val[7].u64 = dst.h; |
| |
| |
| break; |
| } |
| |
| default: |
| unreachable("unknown bit width"); |
| } |
| } |
| |
| void |
| nir_eval_const_opcode(nir_op op, nir_const_value *dest, |
| unsigned num_components, unsigned bit_width, |
| nir_const_value **src, |
| unsigned float_controls_execution_mode) |
| { |
| switch (op) { |
| case nir_op_amul: |
| evaluate_amul(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_fequal16: |
| evaluate_b16all_fequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_fequal2: |
| evaluate_b16all_fequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_fequal3: |
| evaluate_b16all_fequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_fequal4: |
| evaluate_b16all_fequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_fequal8: |
| evaluate_b16all_fequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_iequal16: |
| evaluate_b16all_iequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_iequal2: |
| evaluate_b16all_iequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_iequal3: |
| evaluate_b16all_iequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_iequal4: |
| evaluate_b16all_iequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16all_iequal8: |
| evaluate_b16all_iequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_fnequal16: |
| evaluate_b16any_fnequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_fnequal2: |
| evaluate_b16any_fnequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_fnequal3: |
| evaluate_b16any_fnequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_fnequal4: |
| evaluate_b16any_fnequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_fnequal8: |
| evaluate_b16any_fnequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_inequal16: |
| evaluate_b16any_inequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_inequal2: |
| evaluate_b16any_inequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_inequal3: |
| evaluate_b16any_inequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_inequal4: |
| evaluate_b16any_inequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16any_inequal8: |
| evaluate_b16any_inequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b16csel: |
| evaluate_b16csel(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2b1: |
| evaluate_b2b1(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2b16: |
| evaluate_b2b16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2b32: |
| evaluate_b2b32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2b8: |
| evaluate_b2b8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2f16: |
| evaluate_b2f16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2f32: |
| evaluate_b2f32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2f64: |
| evaluate_b2f64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2i1: |
| evaluate_b2i1(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2i16: |
| evaluate_b2i16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2i32: |
| evaluate_b2i32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2i64: |
| evaluate_b2i64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b2i8: |
| evaluate_b2i8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_fequal16: |
| evaluate_b32all_fequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_fequal2: |
| evaluate_b32all_fequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_fequal3: |
| evaluate_b32all_fequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_fequal4: |
| evaluate_b32all_fequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_fequal8: |
| evaluate_b32all_fequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_iequal16: |
| evaluate_b32all_iequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_iequal2: |
| evaluate_b32all_iequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_iequal3: |
| evaluate_b32all_iequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_iequal4: |
| evaluate_b32all_iequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32all_iequal8: |
| evaluate_b32all_iequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_fnequal16: |
| evaluate_b32any_fnequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_fnequal2: |
| evaluate_b32any_fnequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_fnequal3: |
| evaluate_b32any_fnequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_fnequal4: |
| evaluate_b32any_fnequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_fnequal8: |
| evaluate_b32any_fnequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_inequal16: |
| evaluate_b32any_inequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_inequal2: |
| evaluate_b32any_inequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_inequal3: |
| evaluate_b32any_inequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_inequal4: |
| evaluate_b32any_inequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32any_inequal8: |
| evaluate_b32any_inequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b32csel: |
| evaluate_b32csel(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_fequal16: |
| evaluate_b8all_fequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_fequal2: |
| evaluate_b8all_fequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_fequal3: |
| evaluate_b8all_fequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_fequal4: |
| evaluate_b8all_fequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_fequal8: |
| evaluate_b8all_fequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_iequal16: |
| evaluate_b8all_iequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_iequal2: |
| evaluate_b8all_iequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_iequal3: |
| evaluate_b8all_iequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_iequal4: |
| evaluate_b8all_iequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8all_iequal8: |
| evaluate_b8all_iequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_fnequal16: |
| evaluate_b8any_fnequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_fnequal2: |
| evaluate_b8any_fnequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_fnequal3: |
| evaluate_b8any_fnequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_fnequal4: |
| evaluate_b8any_fnequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_fnequal8: |
| evaluate_b8any_fnequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_inequal16: |
| evaluate_b8any_inequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_inequal2: |
| evaluate_b8any_inequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_inequal3: |
| evaluate_b8any_inequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_inequal4: |
| evaluate_b8any_inequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8any_inequal8: |
| evaluate_b8any_inequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_b8csel: |
| evaluate_b8csel(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_fequal16: |
| evaluate_ball_fequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_fequal2: |
| evaluate_ball_fequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_fequal3: |
| evaluate_ball_fequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_fequal4: |
| evaluate_ball_fequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_fequal8: |
| evaluate_ball_fequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_iequal16: |
| evaluate_ball_iequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_iequal2: |
| evaluate_ball_iequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_iequal3: |
| evaluate_ball_iequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_iequal4: |
| evaluate_ball_iequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ball_iequal8: |
| evaluate_ball_iequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_fnequal16: |
| evaluate_bany_fnequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_fnequal2: |
| evaluate_bany_fnequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_fnequal3: |
| evaluate_bany_fnequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_fnequal4: |
| evaluate_bany_fnequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_fnequal8: |
| evaluate_bany_fnequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_inequal16: |
| evaluate_bany_inequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_inequal2: |
| evaluate_bany_inequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_inequal3: |
| evaluate_bany_inequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_inequal4: |
| evaluate_bany_inequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bany_inequal8: |
| evaluate_bany_inequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bcsel: |
| evaluate_bcsel(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bfi: |
| evaluate_bfi(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bfm: |
| evaluate_bfm(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bit_count: |
| evaluate_bit_count(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bitfield_insert: |
| evaluate_bitfield_insert(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bitfield_reverse: |
| evaluate_bitfield_reverse(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_bitfield_select: |
| evaluate_bitfield_select(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_cube_face_coord: |
| evaluate_cube_face_coord(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_cube_face_index: |
| evaluate_cube_face_index(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_extract_i16: |
| evaluate_extract_i16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_extract_i8: |
| evaluate_extract_i8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_extract_u16: |
| evaluate_extract_u16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_extract_u8: |
| evaluate_extract_u8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2b1: |
| evaluate_f2b1(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2b16: |
| evaluate_f2b16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2b32: |
| evaluate_f2b32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2b8: |
| evaluate_f2b8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2f16: |
| evaluate_f2f16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2f16_rtne: |
| evaluate_f2f16_rtne(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2f16_rtz: |
| evaluate_f2f16_rtz(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2f32: |
| evaluate_f2f32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2f64: |
| evaluate_f2f64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2fmp: |
| evaluate_f2fmp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2i1: |
| evaluate_f2i1(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2i16: |
| evaluate_f2i16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2i32: |
| evaluate_f2i32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2i64: |
| evaluate_f2i64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2i8: |
| evaluate_f2i8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2imp: |
| evaluate_f2imp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2u1: |
| evaluate_f2u1(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2u16: |
| evaluate_f2u16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2u32: |
| evaluate_f2u32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2u64: |
| evaluate_f2u64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2u8: |
| evaluate_f2u8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_f2ump: |
| evaluate_f2ump(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fabs: |
| evaluate_fabs(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fadd: |
| evaluate_fadd(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fall_equal16: |
| evaluate_fall_equal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fall_equal2: |
| evaluate_fall_equal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fall_equal3: |
| evaluate_fall_equal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fall_equal4: |
| evaluate_fall_equal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fall_equal8: |
| evaluate_fall_equal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fany_nequal16: |
| evaluate_fany_nequal16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fany_nequal2: |
| evaluate_fany_nequal2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fany_nequal3: |
| evaluate_fany_nequal3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fany_nequal4: |
| evaluate_fany_nequal4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fany_nequal8: |
| evaluate_fany_nequal8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fceil: |
| evaluate_fceil(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fclamp_pos: |
| evaluate_fclamp_pos(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fcos: |
| evaluate_fcos(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fcsel: |
| evaluate_fcsel(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fddx: |
| evaluate_fddx(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fddx_coarse: |
| evaluate_fddx_coarse(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fddx_fine: |
| evaluate_fddx_fine(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fddy: |
| evaluate_fddy(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fddy_coarse: |
| evaluate_fddy_coarse(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fddy_fine: |
| evaluate_fddy_fine(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdiv: |
| evaluate_fdiv(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot16: |
| evaluate_fdot16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot16_replicated: |
| evaluate_fdot16_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot2: |
| evaluate_fdot2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot2_replicated: |
| evaluate_fdot2_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot3: |
| evaluate_fdot3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot3_replicated: |
| evaluate_fdot3_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot4: |
| evaluate_fdot4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot4_replicated: |
| evaluate_fdot4_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot8: |
| evaluate_fdot8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdot8_replicated: |
| evaluate_fdot8_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdph: |
| evaluate_fdph(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fdph_replicated: |
| evaluate_fdph_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_feq: |
| evaluate_feq(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_feq16: |
| evaluate_feq16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_feq32: |
| evaluate_feq32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_feq8: |
| evaluate_feq8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fexp2: |
| evaluate_fexp2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ffloor: |
| evaluate_ffloor(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ffma: |
| evaluate_ffma(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ffract: |
| evaluate_ffract(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fge: |
| evaluate_fge(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fge16: |
| evaluate_fge16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fge32: |
| evaluate_fge32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fge8: |
| evaluate_fge8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_find_lsb: |
| evaluate_find_lsb(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fisfinite: |
| evaluate_fisfinite(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fisnormal: |
| evaluate_fisnormal(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_flog2: |
| evaluate_flog2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_flrp: |
| evaluate_flrp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_flt: |
| evaluate_flt(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_flt16: |
| evaluate_flt16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_flt32: |
| evaluate_flt32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_flt8: |
| evaluate_flt8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fmax: |
| evaluate_fmax(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fmin: |
| evaluate_fmin(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fmod: |
| evaluate_fmod(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fmul: |
| evaluate_fmul(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fneg: |
| evaluate_fneg(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fneu: |
| evaluate_fneu(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fneu16: |
| evaluate_fneu16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fneu32: |
| evaluate_fneu32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fneu8: |
| evaluate_fneu8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fpow: |
| evaluate_fpow(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fquantize2f16: |
| evaluate_fquantize2f16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_frcp: |
| evaluate_frcp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_frem: |
| evaluate_frem(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_frexp_exp: |
| evaluate_frexp_exp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_frexp_sig: |
| evaluate_frexp_sig(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fround_even: |
| evaluate_fround_even(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_frsq: |
| evaluate_frsq(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsat: |
| evaluate_fsat(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsat_signed: |
| evaluate_fsat_signed(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsign: |
| evaluate_fsign(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsin: |
| evaluate_fsin(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsqrt: |
| evaluate_fsqrt(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsub: |
| evaluate_fsub(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsum2: |
| evaluate_fsum2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsum3: |
| evaluate_fsum3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_fsum4: |
| evaluate_fsum4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ftrunc: |
| evaluate_ftrunc(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2b1: |
| evaluate_i2b1(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2b16: |
| evaluate_i2b16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2b32: |
| evaluate_i2b32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2b8: |
| evaluate_i2b8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2f16: |
| evaluate_i2f16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2f32: |
| evaluate_i2f32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2f64: |
| evaluate_i2f64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2fmp: |
| evaluate_i2fmp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2i1: |
| evaluate_i2i1(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2i16: |
| evaluate_i2i16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2i32: |
| evaluate_i2i32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2i64: |
| evaluate_i2i64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2i8: |
| evaluate_i2i8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_i2imp: |
| evaluate_i2imp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_iabs: |
| evaluate_iabs(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_iadd: |
| evaluate_iadd(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_iadd_sat: |
| evaluate_iadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_iand: |
| evaluate_iand(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ibfe: |
| evaluate_ibfe(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ibitfield_extract: |
| evaluate_ibitfield_extract(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_idiv: |
| evaluate_idiv(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ieq: |
| evaluate_ieq(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ieq16: |
| evaluate_ieq16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ieq32: |
| evaluate_ieq32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ieq8: |
| evaluate_ieq8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ifind_msb: |
| evaluate_ifind_msb(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ige: |
| evaluate_ige(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ige16: |
| evaluate_ige16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ige32: |
| evaluate_ige32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ige8: |
| evaluate_ige8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ihadd: |
| evaluate_ihadd(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ilt: |
| evaluate_ilt(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ilt16: |
| evaluate_ilt16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ilt32: |
| evaluate_ilt32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ilt8: |
| evaluate_ilt8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imad24_ir3: |
| evaluate_imad24_ir3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imadsh_mix16: |
| evaluate_imadsh_mix16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imax: |
| evaluate_imax(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imin: |
| evaluate_imin(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imod: |
| evaluate_imod(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imul: |
| evaluate_imul(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imul24: |
| evaluate_imul24(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imul_2x32_64: |
| evaluate_imul_2x32_64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imul_32x16: |
| evaluate_imul_32x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_imul_high: |
| evaluate_imul_high(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ine: |
| evaluate_ine(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ine16: |
| evaluate_ine16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ine32: |
| evaluate_ine32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ine8: |
| evaluate_ine8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ineg: |
| evaluate_ineg(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_inot: |
| evaluate_inot(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ior: |
| evaluate_ior(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_irem: |
| evaluate_irem(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_irhadd: |
| evaluate_irhadd(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ishl: |
| evaluate_ishl(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ishr: |
| evaluate_ishr(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_isign: |
| evaluate_isign(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_isub: |
| evaluate_isub(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_isub_sat: |
| evaluate_isub_sat(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ixor: |
| evaluate_ixor(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ldexp: |
| evaluate_ldexp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_mov: |
| evaluate_mov(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_32_2x16: |
| evaluate_pack_32_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_32_2x16_split: |
| evaluate_pack_32_2x16_split(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_32_4x8: |
| evaluate_pack_32_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_64_2x32: |
| evaluate_pack_64_2x32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_64_2x32_split: |
| evaluate_pack_64_2x32_split(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_64_4x16: |
| evaluate_pack_64_4x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_half_2x16: |
| evaluate_pack_half_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_half_2x16_split: |
| evaluate_pack_half_2x16_split(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_snorm_2x16: |
| evaluate_pack_snorm_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_snorm_4x8: |
| evaluate_pack_snorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_unorm_2x16: |
| evaluate_pack_unorm_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_unorm_4x8: |
| evaluate_pack_unorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_uvec2_to_uint: |
| evaluate_pack_uvec2_to_uint(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_pack_uvec4_to_uint: |
| evaluate_pack_uvec4_to_uint(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_seq: |
| evaluate_seq(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_sge: |
| evaluate_sge(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_slt: |
| evaluate_slt(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_sne: |
| evaluate_sne(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2f16: |
| evaluate_u2f16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2f32: |
| evaluate_u2f32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2f64: |
| evaluate_u2f64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2fmp: |
| evaluate_u2fmp(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2u1: |
| evaluate_u2u1(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2u16: |
| evaluate_u2u16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2u32: |
| evaluate_u2u32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2u64: |
| evaluate_u2u64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_u2u8: |
| evaluate_u2u8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uabs_isub: |
| evaluate_uabs_isub(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uabs_usub: |
| evaluate_uabs_usub(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uadd_carry: |
| evaluate_uadd_carry(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uadd_sat: |
| evaluate_uadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ubfe: |
| evaluate_ubfe(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ubitfield_extract: |
| evaluate_ubitfield_extract(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uclz: |
| evaluate_uclz(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_udiv: |
| evaluate_udiv(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ufind_msb: |
| evaluate_ufind_msb(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uge: |
| evaluate_uge(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uge16: |
| evaluate_uge16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uge32: |
| evaluate_uge32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uge8: |
| evaluate_uge8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uhadd: |
| evaluate_uhadd(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ult: |
| evaluate_ult(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ult16: |
| evaluate_ult16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ult32: |
| evaluate_ult32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ult8: |
| evaluate_ult8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umad24: |
| evaluate_umad24(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umax: |
| evaluate_umax(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umax_4x8: |
| evaluate_umax_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umin: |
| evaluate_umin(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umin_4x8: |
| evaluate_umin_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umod: |
| evaluate_umod(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umul24: |
| evaluate_umul24(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umul_2x32_64: |
| evaluate_umul_2x32_64(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umul_32x16: |
| evaluate_umul_32x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umul_high: |
| evaluate_umul_high(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umul_low: |
| evaluate_umul_low(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_umul_unorm_4x8: |
| evaluate_umul_unorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_32_2x16: |
| evaluate_unpack_32_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_32_2x16_split_x: |
| evaluate_unpack_32_2x16_split_x(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_32_2x16_split_y: |
| evaluate_unpack_32_2x16_split_y(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_32_4x8: |
| evaluate_unpack_32_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_64_2x32: |
| evaluate_unpack_64_2x32(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_64_2x32_split_x: |
| evaluate_unpack_64_2x32_split_x(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_64_2x32_split_y: |
| evaluate_unpack_64_2x32_split_y(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_64_4x16: |
| evaluate_unpack_64_4x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_half_2x16: |
| evaluate_unpack_half_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_half_2x16_flush_to_zero: |
| evaluate_unpack_half_2x16_flush_to_zero(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_half_2x16_split_x: |
| evaluate_unpack_half_2x16_split_x(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_half_2x16_split_x_flush_to_zero: |
| evaluate_unpack_half_2x16_split_x_flush_to_zero(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_half_2x16_split_y: |
| evaluate_unpack_half_2x16_split_y(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_half_2x16_split_y_flush_to_zero: |
| evaluate_unpack_half_2x16_split_y_flush_to_zero(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_snorm_2x16: |
| evaluate_unpack_snorm_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_snorm_4x8: |
| evaluate_unpack_snorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_unorm_2x16: |
| evaluate_unpack_unorm_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_unpack_unorm_4x8: |
| evaluate_unpack_unorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_urhadd: |
| evaluate_urhadd(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_urol: |
| evaluate_urol(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_uror: |
| evaluate_uror(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_usadd_4x8: |
| evaluate_usadd_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ushr: |
| evaluate_ushr(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_ussub_4x8: |
| evaluate_ussub_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_usub_borrow: |
| evaluate_usub_borrow(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_usub_sat: |
| evaluate_usub_sat(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_vec16: |
| evaluate_vec16(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_vec2: |
| evaluate_vec2(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_vec3: |
| evaluate_vec3(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_vec4: |
| evaluate_vec4(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| case nir_op_vec8: |
| evaluate_vec8(dest, num_components, bit_width, src, float_controls_execution_mode); |
| return; |
| default: |
| unreachable("shouldn't get here"); |
| } |
| } |