fp16_ieee_to_fp32x2_psimd and fp16_alt_to_fp32x2_psimd
diff --git a/bench/alt-array.cc b/bench/alt-array.cc
index 81f55f0..2ea6fc7 100644
--- a/bench/alt-array.cc
+++ b/bench/alt-array.cc
@@ -32,7 +32,7 @@
}
state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
}
-BENCHMARK(fp16_alt_to_fp32_bits)->RangeMultiplier(2)->Range(1<<10, 1<<20);;
+BENCHMARK(fp16_alt_to_fp32_bits)->RangeMultiplier(2)->Range(1<<10, 1<<20);
static void fp16_alt_to_fp32_value(benchmark::State& state) {
const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
@@ -56,7 +56,7 @@
}
state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
}
-BENCHMARK(fp16_alt_to_fp32_value)->RangeMultiplier(2)->Range(1<<10, 1<<20);;
+BENCHMARK(fp16_alt_to_fp32_value)->RangeMultiplier(2)->Range(1<<10, 1<<20);
static void fp16_alt_to_fp32_psimd(benchmark::State& state) {
const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
@@ -85,6 +85,34 @@
}
state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
}
-BENCHMARK(fp16_alt_to_fp32_psimd)->RangeMultiplier(2)->Range(1<<10, 1<<20);;
+BENCHMARK(fp16_alt_to_fp32_psimd)->RangeMultiplier(2)->Range(1<<10, 1<<20);
+
+static void fp16_alt_to_fp32x2_psimd(benchmark::State& state) {
+ const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+ auto rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 0x7BFF), std::mt19937(seed));
+
+ std::vector<uint16_t> fp16(state.range(0));
+ std::vector<float> fp32(state.range(0));
+ std::generate(fp16.begin(), fp16.end(), std::ref(rng));
+
+ while (state.KeepRunning()) {
+ uint16_t* input = fp16.data();
+ benchmark::DoNotOptimize(input);
+
+ float* output = fp32.data();
+ const size_t n = state.range(0);
+ for (size_t i = 0; i < n; i += 8) {
+ const psimd_f32x2 data =
+ fp16_alt_to_fp32x2_psimd(
+ psimd_load_u16(&input[i]));
+ psimd_store_f32(&output[i], data.lo);
+ psimd_store_f32(&output[i + 4], data.hi);
+ }
+
+ benchmark::DoNotOptimize(output);
+ }
+ state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+}
+BENCHMARK(fp16_alt_to_fp32x2_psimd)->RangeMultiplier(2)->Range(1<<10, 1<<20);
BENCHMARK_MAIN();
diff --git a/bench/alt-element.cc b/bench/alt-element.cc
index 2d3772c..27e454c 100644
--- a/bench/alt-element.cc
+++ b/bench/alt-element.cc
@@ -22,7 +22,7 @@
BENCHMARK(fp16_alt_to_fp32_value);
static void fp16_alt_to_fp32_psimd(benchmark::State& state) {
- psimd_u16 fp16 = (psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03 };;
+ psimd_u16 fp16 = (psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03 };
const psimd_u16 increment = psimd_splat_u16(4);
while (state.KeepRunning()) {
const psimd_f32 fp32 = fp16_alt_to_fp32_psimd(fp16);
@@ -32,6 +32,18 @@
}
BENCHMARK(fp16_alt_to_fp32_psimd);
+static void fp16_alt_to_fp32x2_psimd(benchmark::State& state) {
+ psimd_u16 fp16 =
+ (psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03, 0x7C04, 0x7C05, 0x7C06, 0x7C07 };
+ const psimd_u16 increment = psimd_splat_u16(8);
+ while (state.KeepRunning()) {
+ const psimd_f32x2 fp32 = fp16_alt_to_fp32x2_psimd(fp16);
+ fp16 += increment;
+ benchmark::DoNotOptimize(fp32);
+ }
+}
+BENCHMARK(fp16_alt_to_fp32x2_psimd);
+
static void fp16_alt_from_fp32_value(benchmark::State& state) {
uint32_t fp32 = UINT32_C(0x7F800000);
while (state.KeepRunning()) {
diff --git a/bench/ieee-array.cc b/bench/ieee-array.cc
index 6fe45c1..7e0430c 100644
--- a/bench/ieee-array.cc
+++ b/bench/ieee-array.cc
@@ -91,6 +91,34 @@
}
BENCHMARK(fp16_ieee_to_fp32_psimd)->RangeMultiplier(2)->Range(1<<10, 1<<20);
+static void fp16_ieee_to_fp32x2_psimd(benchmark::State& state) {
+ const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+ auto rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 0x7BFF), std::mt19937(seed));
+
+ std::vector<uint16_t> fp16(state.range(0));
+ std::vector<float> fp32(state.range(0));
+ std::generate(fp16.begin(), fp16.end(), std::ref(rng));
+
+ while (state.KeepRunning()) {
+ uint16_t* input = fp16.data();
+ benchmark::DoNotOptimize(input);
+
+ float* output = fp32.data();
+ const size_t n = state.range(0);
+ for (size_t i = 0; i < n; i += 8) {
+ const psimd_f32x2 data =
+ fp16_ieee_to_fp32x2_psimd(
+ psimd_load_u16(&input[i]));
+ psimd_store_f32(&output[i], data.lo);
+ psimd_store_f32(&output[i + 4], data.hi);
+ }
+
+ benchmark::DoNotOptimize(output);
+ }
+ state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+}
+BENCHMARK(fp16_ieee_to_fp32x2_psimd)->RangeMultiplier(2)->Range(1<<10, 1<<20);
+
#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
static void fp16_ieee_to_fp32_hardware(benchmark::State& state) {
const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
diff --git a/bench/ieee-element.cc b/bench/ieee-element.cc
index bd35b33..dd74209 100644
--- a/bench/ieee-element.cc
+++ b/bench/ieee-element.cc
@@ -27,7 +27,7 @@
BENCHMARK(fp16_ieee_to_fp32_value);
static void fp16_ieee_to_fp32_psimd(benchmark::State& state) {
- psimd_u16 fp16 = (psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03 };;
+ psimd_u16 fp16 = (psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03 };
const psimd_u16 increment = psimd_splat_u16(4);
while (state.KeepRunning()) {
const psimd_f32 fp32 = fp16_ieee_to_fp32_psimd(fp16);
@@ -37,6 +37,18 @@
}
BENCHMARK(fp16_ieee_to_fp32_psimd);
+static void fp16_ieee_to_fp32x2_psimd(benchmark::State& state) {
+ psimd_u16 fp16 =
+ (psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03, 0x7C04, 0x7C05, 0x7C06, 0x7C07 };
+ const psimd_u16 increment = psimd_splat_u16(8);
+ while (state.KeepRunning()) {
+ const psimd_f32x2 fp32 = fp16_ieee_to_fp32x2_psimd(fp16);
+ fp16 += increment;
+ benchmark::DoNotOptimize(fp32);
+ }
+}
+BENCHMARK(fp16_ieee_to_fp32x2_psimd);
+
static void fp16_ieee_from_fp32_value(benchmark::State& state) {
uint32_t fp32 = UINT32_C(0x7F800000);
while (state.KeepRunning()) {
diff --git a/configure.py b/configure.py
index 0aecc96..df226d3 100755
--- a/configure.py
+++ b/configure.py
@@ -30,6 +30,9 @@
build.unittest("ieee-to-fp32-psimd", build.cxx("ieee-to-fp32-psimd.cc"))
build.unittest("alt-to-fp32-psimd", build.cxx("alt-to-fp32-psimd.cc"))
+ build.unittest("ieee-to-fp32x2-psimd", build.cxx("ieee-to-fp32x2-psimd.cc"))
+ build.unittest("alt-to-fp32x2-psimd", build.cxx("alt-to-fp32x2-psimd.cc"))
+
build.unittest("bitcasts", build.cxx("bitcasts.cc"))
with build.options(source_dir="bench", deps=[build.deps.googlebenchmark, build.deps.psimd]):
diff --git a/include/fp16/psimd.h b/include/fp16/psimd.h
index bec8756..2e1425c 100644
--- a/include/fp16/psimd.h
+++ b/include/fp16/psimd.h
@@ -32,6 +32,37 @@
return (psimd_f32) (sign | (psimd_s32) psimd_blend_f32(denorm_mask, denorm_nonsign, norm_nonsign));
}
+PSIMD_INTRINSIC psimd_f32x2 fp16_ieee_to_fp32x2_psimd(psimd_u16 half) {
+ const psimd_u32 word_lo = (psimd_u32) psimd_unpacklo_u16(psimd_zero_u16(), half);
+ const psimd_u32 word_hi = (psimd_u32) psimd_unpackhi_u16(psimd_zero_u16(), half);
+
+ const psimd_u32 sign_mask = psimd_splat_u32(UINT32_C(0x80000000));
+ const psimd_u32 sign_lo = word_lo & sign_mask;
+ const psimd_u32 sign_hi = word_hi & sign_mask;
+ const psimd_u32 shr3_nonsign_lo = (word_lo + word_lo) >> psimd_splat_u32(4);
+ const psimd_u32 shr3_nonsign_hi = (word_hi + word_hi) >> psimd_splat_u32(4);
+
+ const psimd_u32 exp_offset = psimd_splat_u32(UINT32_C(0x70000000));
+ const psimd_f32 exp_scale = psimd_splat_f32(0x1.0p-112f);
+ const psimd_f32 norm_nonsign_lo = (psimd_f32) (shr3_nonsign_lo + exp_offset) * exp_scale;
+ const psimd_f32 norm_nonsign_hi = (psimd_f32) (shr3_nonsign_hi + exp_offset) * exp_scale;
+
+ const psimd_u16 magic_mask = psimd_splat_u16(UINT16_C(0x3E80));
+ const psimd_u16 shl1_half = half + half;
+ const psimd_f32 magic_bias = psimd_splat_f32(0.25f);
+ const psimd_f32 denorm_nonsign_lo = (psimd_f32) psimd_unpacklo_u16(shl1_half, magic_mask) - magic_bias;
+ const psimd_f32 denorm_nonsign_hi = (psimd_f32) psimd_unpackhi_u16(shl1_half, magic_mask) - magic_bias;
+
+ const psimd_s32 denorm_cutoff = psimd_splat_s32(INT32_C(0x00800000));
+ const psimd_s32 denorm_mask_lo = (psimd_s32) shr3_nonsign_lo < denorm_cutoff;
+ const psimd_s32 denorm_mask_hi = (psimd_s32) shr3_nonsign_hi < denorm_cutoff;
+
+ psimd_f32x2 result;
+ result.lo = (psimd_f32) (sign_lo | (psimd_s32) psimd_blend_f32(denorm_mask_lo, denorm_nonsign_lo, norm_nonsign_lo));
+ result.hi = (psimd_f32) (sign_hi | (psimd_s32) psimd_blend_f32(denorm_mask_hi, denorm_nonsign_hi, norm_nonsign_hi));
+ return result;
+}
+
PSIMD_INTRINSIC psimd_f32 fp16_alt_to_fp32_psimd(psimd_u16 half) {
const psimd_u32 word = (psimd_u32) psimd_unpacklo_u16(psimd_zero_u16(), half);
@@ -50,4 +81,34 @@
return (psimd_f32) (sign | (psimd_s32) psimd_blend_f32(denorm_mask, denorm_nonsign, norm_nonsign));
}
+PSIMD_INTRINSIC psimd_f32x2 fp16_alt_to_fp32x2_psimd(psimd_u16 half) {
+ const psimd_u32 word_lo = (psimd_u32) psimd_unpacklo_u16(psimd_zero_u16(), half);
+ const psimd_u32 word_hi = (psimd_u32) psimd_unpackhi_u16(psimd_zero_u16(), half);
+
+ const psimd_u32 sign_mask = psimd_splat_u32(UINT32_C(0x80000000));
+ const psimd_u32 sign_lo = word_lo & sign_mask;
+ const psimd_u32 sign_hi = word_hi & sign_mask;
+ const psimd_u32 shr3_nonsign_lo = (word_lo + word_lo) >> psimd_splat_u32(4);
+ const psimd_u32 shr3_nonsign_hi = (word_hi + word_hi) >> psimd_splat_u32(4);
+
+ const psimd_u32 exp_offset = psimd_splat_u32(UINT32_C(0x38000000));
+ const psimd_f32 norm_nonsign_lo = (psimd_f32) (shr3_nonsign_lo + exp_offset);
+ const psimd_f32 norm_nonsign_hi = (psimd_f32) (shr3_nonsign_hi + exp_offset);
+
+ const psimd_u16 magic_mask = psimd_splat_u16(UINT16_C(0x3E80));
+ const psimd_u16 shl1_half = half + half;
+ const psimd_f32 magic_bias = psimd_splat_f32(0.25f);
+ const psimd_f32 denorm_nonsign_lo = (psimd_f32) psimd_unpacklo_u16(shl1_half, magic_mask) - magic_bias;
+ const psimd_f32 denorm_nonsign_hi = (psimd_f32) psimd_unpackhi_u16(shl1_half, magic_mask) - magic_bias;
+
+ const psimd_s32 denorm_cutoff = psimd_splat_s32(INT32_C(0x00800000));
+ const psimd_s32 denorm_mask_lo = (psimd_s32) shr3_nonsign_lo < denorm_cutoff;
+ const psimd_s32 denorm_mask_hi = (psimd_s32) shr3_nonsign_hi < denorm_cutoff;
+
+ psimd_f32x2 result;
+ result.lo = (psimd_f32) (sign_lo | (psimd_s32) psimd_blend_f32(denorm_mask_lo, denorm_nonsign_lo, norm_nonsign_lo));
+ result.hi = (psimd_f32) (sign_hi | (psimd_s32) psimd_blend_f32(denorm_mask_hi, denorm_nonsign_hi, norm_nonsign_hi));
+ return result;
+}
+
#endif /* FP16_PSIMD_H */
diff --git a/test/alt-to-fp32x2-psimd.cc b/test/alt-to-fp32x2-psimd.cc
new file mode 100644
index 0000000..b571006
--- /dev/null
+++ b/test/alt-to-fp32x2-psimd.cc
@@ -0,0 +1,245 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include <fp16/psimd.h>
+
+
+TEST(FP16_ALT_TO_FP32x2_PSIMD, positive_normalized_values) {
+ const uint32_t exponentBias = 15;
+ for (int32_t e = -14; e <= 16; e++) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 1),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 2),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 3),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 4),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 5),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 6),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 7)
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_alt_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[4]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[5]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[6]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[7]);
+ }
+ }
+}
+
+TEST(FP16_ALT_TO_FP32x2_PSIMD, negative_normalized_values) {
+ const uint32_t exponentBias = 15;
+ for (int32_t e = -14; e <= 16; e++) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8000),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8001),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8002),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8003),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8004),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8005),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8006),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8007)
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_alt_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[4]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[5]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[6]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[7]);
+ }
+ }
+}
+
+TEST(FP16_ALT_TO_FP32x2_PSIMD, positive_denormalized_values) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + 0),
+ (uint16_t) (h + 1),
+ (uint16_t) (h + 2),
+ (uint16_t) (h + 3),
+ (uint16_t) (h + 4),
+ (uint16_t) (h + 5),
+ (uint16_t) (h + 6),
+ (uint16_t) (h + 7)
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_alt_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[4]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[5]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[6]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[7]);
+ }
+}
+
+TEST(FP16_ALT_TO_FP32x2_PSIMD, negative_denormalized_values) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + 0x8000),
+ (uint16_t) (h + 0x8001),
+ (uint16_t) (h + 0x8002),
+ (uint16_t) (h + 0x8003),
+ (uint16_t) (h + 0x8004),
+ (uint16_t) (h + 0x8005),
+ (uint16_t) (h + 0x8006),
+ (uint16_t) (h + 0x8007)
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_alt_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[4]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[5]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[6]);
+ EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[7]);
+ }
+}
diff --git a/test/ieee-to-fp32x2-psimd.cc b/test/ieee-to-fp32x2-psimd.cc
new file mode 100644
index 0000000..c02c1d8
--- /dev/null
+++ b/test/ieee-to-fp32x2-psimd.cc
@@ -0,0 +1,541 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include <fp16/psimd.h>
+
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, infinity) {
+ const uint16_t positive_infinity_f16 = UINT16_C(0x7C00);
+ const uint16_t negative_infinity_f16 = UINT16_C(0xFC00);
+
+ const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+ const uint32_t negative_infinity_f32 = UINT32_C(0xFF800000);
+
+ const psimd_u16 fp16 = {
+ positive_infinity_f16, negative_infinity_f16,
+ negative_infinity_f16, positive_infinity_f16,
+ positive_infinity_f16, positive_infinity_f16,
+ negative_infinity_f16, negative_infinity_f16
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(positive_infinity_f32, fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << positive_infinity_f32;
+ EXPECT_EQ(negative_infinity_f32, fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << negative_infinity_f32;
+ EXPECT_EQ(negative_infinity_f32, fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << negative_infinity_f32;
+ EXPECT_EQ(positive_infinity_f32, fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << positive_infinity_f32;
+ EXPECT_EQ(positive_infinity_f32, fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << positive_infinity_f32;
+ EXPECT_EQ(positive_infinity_f32, fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << positive_infinity_f32;
+ EXPECT_EQ(negative_infinity_f32, fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << negative_infinity_f32;
+ EXPECT_EQ(negative_infinity_f32, fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << negative_infinity_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, positive_nan) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + 0x7C00 + (h == 0)) /* Avoid infinity */,
+ (uint16_t) (h + 0x7C01),
+ (uint16_t) (h + 0x7C02),
+ (uint16_t) (h + 0x7C03),
+ (uint16_t) (h + 0x7C04),
+ (uint16_t) (h + 0x7C05),
+ (uint16_t) (h + 0x7C06),
+ (uint16_t) (h + 0x7C07),
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+ /* Check sign */
+ EXPECT_EQ(fp32.lo[0] & UINT32_C(0x80000000), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+ EXPECT_EQ(fp32.lo[1] & UINT32_C(0x80000000), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+ EXPECT_EQ(fp32.lo[2] & UINT32_C(0x80000000), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+ EXPECT_EQ(fp32.lo[3] & UINT32_C(0x80000000), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+ EXPECT_EQ(fp32.hi[0] & UINT32_C(0x80000000), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+ EXPECT_EQ(fp32.hi[1] & UINT32_C(0x80000000), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+ EXPECT_EQ(fp32.hi[2] & UINT32_C(0x80000000), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+ EXPECT_EQ(fp32.hi[3] & UINT32_C(0x80000000), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+
+ /* Check exponent */
+ EXPECT_EQ(fp32.lo[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+ EXPECT_EQ(fp32.lo[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+ EXPECT_EQ(fp32.lo[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+ EXPECT_EQ(fp32.lo[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+ EXPECT_EQ(fp32.hi[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+ EXPECT_EQ(fp32.hi[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+ EXPECT_EQ(fp32.hi[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+ EXPECT_EQ(fp32.hi[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+
+ /* Check mantissa */
+ EXPECT_NE(fp32.lo[0] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+ EXPECT_NE(fp32.lo[1] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+ EXPECT_NE(fp32.lo[2] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+ EXPECT_NE(fp32.lo[3] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+ EXPECT_NE(fp32.hi[0] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+ EXPECT_NE(fp32.hi[1] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+ EXPECT_NE(fp32.hi[2] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+ EXPECT_NE(fp32.hi[3] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+ }
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, negative_nan) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + 0xFC00 + (h == 0)) /* Avoid infinity */,
+ (uint16_t) (h + 0xFC01),
+ (uint16_t) (h + 0xFC02),
+ (uint16_t) (h + 0xFC03),
+ (uint16_t) (h + 0xFC04),
+ (uint16_t) (h + 0xFC05),
+ (uint16_t) (h + 0xFC06),
+ (uint16_t) (h + 0xFC07),
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+ /* Check sign */
+ EXPECT_EQ(fp32.lo[0] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+ EXPECT_EQ(fp32.lo[1] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+ EXPECT_EQ(fp32.lo[2] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+ EXPECT_EQ(fp32.lo[3] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+ EXPECT_EQ(fp32.hi[0] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+ EXPECT_EQ(fp32.hi[1] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+ EXPECT_EQ(fp32.hi[2] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+ EXPECT_EQ(fp32.hi[3] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+
+ /* Check exponent */
+ EXPECT_EQ(fp32.lo[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+ EXPECT_EQ(fp32.lo[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+ EXPECT_EQ(fp32.lo[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+ EXPECT_EQ(fp32.lo[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+ EXPECT_EQ(fp32.hi[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+ EXPECT_EQ(fp32.hi[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+ EXPECT_EQ(fp32.hi[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+ EXPECT_EQ(fp32.hi[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+
+ /* Check mantissa */
+ EXPECT_NE(fp32.lo[0] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+ EXPECT_NE(fp32.lo[1] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+ EXPECT_NE(fp32.lo[2] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+ EXPECT_NE(fp32.lo[3] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+ EXPECT_NE(fp32.hi[0] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+ EXPECT_NE(fp32.hi[1] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+ EXPECT_NE(fp32.hi[2] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+ EXPECT_NE(fp32.hi[3] & UINT32_C(0x007FFFFF), 0) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+ }
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, positive_normalized_values) {
+ const uint32_t exponentBias = 15;
+ for (int32_t e = -14; e <= 15; e++) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 1),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 2),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 3),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 4),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 5),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 6),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 7)
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[4]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[5]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[6]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[7]);
+ }
+ }
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, negative_normalized_values) {
+ const uint32_t exponentBias = 15;
+ for (int32_t e = -14; e <= 15; e++) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8000),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8001),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8002),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8003),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8004),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8005),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8006),
+ (uint16_t) (h + ((e + exponentBias) << 10) + 0x8007)
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[4]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[5]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[6]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[7]);
+ }
+ }
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, positive_denormalized_values) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + 0),
+ (uint16_t) (h + 1),
+ (uint16_t) (h + 2),
+ (uint16_t) (h + 3),
+ (uint16_t) (h + 4),
+ (uint16_t) (h + 5),
+ (uint16_t) (h + 6),
+ (uint16_t) (h + 7)
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[4]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[5]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[6]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[7]);
+ }
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, negative_denormalized_values) {
+ for (uint16_t h = 0; h < 0x0400; h += 8) {
+ const psimd_u16 fp16 = {
+ (uint16_t) (h + 0x8000),
+ (uint16_t) (h + 0x8001),
+ (uint16_t) (h + 0x8002),
+ (uint16_t) (h + 0x8003),
+ (uint16_t) (h + 0x8004),
+ (uint16_t) (h + 0x8005),
+ (uint16_t) (h + 0x8006),
+ (uint16_t) (h + 0x8007)
+ };
+ const psimd_u32x2 fp32 =
+ psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[4]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[5]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[6]);
+ EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+ std::hex << std::uppercase << std::setfill('0') <<
+ "F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+ "F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+ "F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[7]);
+ }
+}