Snap for 6203617 from 4039a7ffa4bbee623e1e616efef9a387b2a8b47d to rvc-release

Change-Id: Ic579dad3e8d3e385b48cae63bcb7f0f1f3219a6e
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..73b2998
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,19 @@
+# Ninja files
+build.ninja
+
+# Build objects and artifacts
+deps/
+build/
+bin/
+lib/
+*.pyc
+*.pyo
+
+# System files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..e58f54f
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,113 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+cc_library_headers {
+    name: "fp16_headers",
+    export_include_dirs: ["include"],
+    vendor_available: true,
+    sdk_version: "current",
+}
+
+cc_defaults {
+    name: "fp16_tests_default",
+    sdk_version: "current",
+    srcs: [
+        "test/tables.cc",
+    ],
+    header_libs: [
+        "fp16_headers",
+    ],
+    stl: "libc++_static",
+    static_libs: [
+        "libgmock_ndk",
+    ]
+}
+
+cc_test {
+    name: "Fp16AltFromFp32ValueTests",
+    defaults: ["fp16_tests_default"],
+    srcs: [
+        "test/alt-from-fp32-value.cc",
+    ],
+    test_suites: [
+        "general-tests",
+    ],
+}
+
+cc_test {
+    name: "Fp16AltToFp32BitsTests",
+    defaults: ["fp16_tests_default"],
+    srcs: [
+        "test/alt-to-fp32-bits.cc",
+    ],
+    test_suites: [
+        "general-tests",
+    ],
+}
+
+cc_test {
+    name: "Fp16AltToFp32ValueTests",
+    defaults: ["fp16_tests_default"],
+    srcs: [
+        "test/alt-to-fp32-value.cc",
+    ],
+    test_suites: [
+        "general-tests",
+    ],
+}
+
+cc_test {
+    name: "Fp16BitcastsTests",
+    defaults: ["fp16_tests_default"],
+    srcs: [
+        "test/bitcasts.cc",
+    ],
+    test_suites: [
+        "general-tests",
+    ],
+}
+
+cc_test {
+    name: "Fp16IEEEFromFp32ValueTests",
+    defaults: ["fp16_tests_default"],
+    srcs: [
+        "test/ieee-from-fp32-value.cc",
+    ],
+    test_suites: [
+        "general-tests",
+    ],
+}
+
+cc_test {
+    name: "Fp16IEEEToFp32BitsTests",
+    defaults: ["fp16_tests_default"],
+    srcs: [
+        "test/ieee-to-fp32-bits.cc",
+    ],
+    test_suites: [
+        "general-tests",
+    ],
+}
+
+cc_test {
+    name: "Fp16IEEEToFp32ValueTests",
+    defaults: ["fp16_tests_default"],
+    srcs: [
+        "test/ieee-to-fp32-value.cc",
+    ],
+    test_suites: [
+        "general-tests",
+    ],
+}
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..e0d87f8
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,181 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
+
+INCLUDE(GNUInstallDirs)
+
+# ---[ Project
+PROJECT(FP16 C CXX)
+
+# ---[ Options.
+OPTION(FP16_BUILD_TESTS "Build FP16 unit tests" ON)
+OPTION(FP16_BUILD_BENCHMARKS "Build FP16 micro-benchmarks" ON)
+
+# ---[ CMake options
+IF(FP16_BUILD_TESTS)
+  ENABLE_TESTING()
+ENDIF()
+
+MACRO(FP16_TARGET_ENABLE_CXX11 target)
+  IF(${CMAKE_VERSION} VERSION_LESS "3.1")
+    IF(NOT MSVC)
+      TARGET_COMPILE_OPTIONS(${target} PRIVATE -std=c++11)
+    ENDIF()
+  ELSE()
+    SET_TARGET_PROPERTIES(${target} PROPERTIES
+      CXX_STANDARD 11
+      CXX_STANDARD_REQUIRED YES
+      CXX_EXTENSIONS YES)
+  ENDIF()
+ENDMACRO()
+
+# ---[ Download deps
+SET(CONFU_DEPENDENCIES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/deps
+  CACHE PATH "Confu-style dependencies source directory")
+SET(CONFU_DEPENDENCIES_BINARY_DIR ${CMAKE_BINARY_DIR}/deps
+  CACHE PATH "Confu-style dependencies binary directory")
+
+IF(NOT DEFINED PSIMD_SOURCE_DIR)
+  MESSAGE(STATUS "Downloading PSimd to ${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd (define PSIMD_SOURCE_DIR to avoid it)")
+  CONFIGURE_FILE(cmake/DownloadPSimd.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd-download/CMakeLists.txt")
+  EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
+    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd-download")
+  EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
+    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd-download")
+  SET(PSIMD_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd" CACHE STRING "PSimd source directory")
+ENDIF()
+
+IF(FP16_BUILD_TESTS AND NOT DEFINED GOOGLETEST_SOURCE_DIR)
+  MESSAGE(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)")
+  CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt")
+  EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
+    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
+  EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
+    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
+  SET(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory")
+ENDIF()
+
+IF(FP16_BUILD_BENCHMARKS AND NOT DEFINED GOOGLEBENCHMARK_SOURCE_DIR)
+  MESSAGE(STATUS "Downloading Google Benchmark to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)")
+  CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt")
+  EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
+    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
+  EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
+    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
+  SET(GOOGLEBENCHMARK_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" CACHE STRING "Google Benchmark source directory")
+ENDIF()
+
+# ---[ FP16 library
+IF(${CMAKE_VERSION} VERSION_LESS "3.0")
+  ADD_LIBRARY(fp16 STATIC
+    include/fp16.h
+    include/fp16/fp16.h
+    include/fp16/bitcasts.h
+    include/fp16/psimd.h)
+  SET_TARGET_PROPERTIES(fp16 PROPERTIES LINKER_LANGUAGE C)
+ELSE()
+  ADD_LIBRARY(fp16 INTERFACE)
+ENDIF()
+TARGET_INCLUDE_DIRECTORIES(fp16 INTERFACE
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:include>)
+
+INSTALL(FILES include/fp16.h
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+INSTALL(FILES
+    include/fp16/bitcasts.h
+    include/fp16/fp16.h
+    include/fp16/psimd.h
+    include/fp16/__init__.py
+    include/fp16/avx.py
+    include/fp16/avx2.py
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/fp16)
+
+# ---[ Configure psimd
+IF(NOT TARGET psimd)
+  ADD_SUBDIRECTORY(
+    "${PSIMD_SOURCE_DIR}"
+    "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd")
+ENDIF()
+
+IF(FP16_BUILD_TESTS)
+  # ---[ Build google test
+  IF(NOT TARGET gtest)
+    SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+    ADD_SUBDIRECTORY(
+      "${GOOGLETEST_SOURCE_DIR}"
+      "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest")
+  ENDIF()
+
+  # ---[ Build FP16 unit tests
+  ADD_EXECUTABLE(ieee-to-fp32-bits-test test/ieee-to-fp32-bits.cc test/tables.cc)
+  TARGET_INCLUDE_DIRECTORIES(ieee-to-fp32-bits-test PRIVATE test)
+  TARGET_LINK_LIBRARIES(ieee-to-fp32-bits-test fp16 gtest gtest_main)
+  ADD_TEST(ieee-to-fp32-bits ieee-to-fp32-bits-test)
+
+  ADD_EXECUTABLE(ieee-to-fp32-value-test test/ieee-to-fp32-value.cc test/tables.cc)
+  TARGET_INCLUDE_DIRECTORIES(ieee-to-fp32-value-test PRIVATE test)
+  TARGET_LINK_LIBRARIES(ieee-to-fp32-value-test fp16 gtest gtest_main)
+  ADD_TEST(ieee-to-fp32-value ieee-to-fp32-value-test)
+
+  ADD_EXECUTABLE(ieee-from-fp32-value-test test/ieee-from-fp32-value.cc test/tables.cc)
+  TARGET_INCLUDE_DIRECTORIES(ieee-from-fp32-value-test PRIVATE test)
+  TARGET_LINK_LIBRARIES(ieee-from-fp32-value-test fp16 gtest gtest_main)
+  ADD_TEST(ieee-from-fp32-value ieee-from-fp32-value-test)
+
+  ADD_EXECUTABLE(alt-to-fp32-bits-test test/alt-to-fp32-bits.cc test/tables.cc)
+  TARGET_INCLUDE_DIRECTORIES(alt-to-fp32-bits-test PRIVATE test)
+  TARGET_LINK_LIBRARIES(alt-to-fp32-bits-test fp16 gtest gtest_main)
+  ADD_TEST(alt-to-fp32-bits alt-to-fp32-bits-test)
+
+  ADD_EXECUTABLE(alt-to-fp32-value-test test/alt-to-fp32-value.cc test/tables.cc)
+  TARGET_INCLUDE_DIRECTORIES(alt-to-fp32-value-test PRIVATE test)
+  TARGET_LINK_LIBRARIES(alt-to-fp32-value-test fp16 gtest gtest_main)
+  ADD_TEST(alt-to-fp32-value alt-to-fp32-value-test)
+
+  ADD_EXECUTABLE(alt-from-fp32-value-test test/alt-from-fp32-value.cc test/tables.cc)
+  TARGET_INCLUDE_DIRECTORIES(alt-from-fp32-value-test PRIVATE test)
+  TARGET_LINK_LIBRARIES(alt-from-fp32-value-test fp16 gtest gtest_main)
+  ADD_TEST(alt-from-fp32-value alt-from-fp32-value-test)
+
+  ADD_EXECUTABLE(bitcasts-test test/bitcasts.cc)
+  TARGET_LINK_LIBRARIES(bitcasts-test fp16 gtest gtest_main)
+  ADD_TEST(bitcasts bitcasts-test)
+ENDIF()
+
+IF(FP16_BUILD_BENCHMARKS)
+  # ---[ Build google benchmark
+  IF(NOT TARGET benchmark)
+    SET(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "")
+    ADD_SUBDIRECTORY(
+      "${GOOGLEBENCHMARK_SOURCE_DIR}"
+      "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark")
+  ENDIF()
+
+  # ---[ Build FP16 benchmarks
+  ADD_EXECUTABLE(ieee-element-bench bench/ieee-element.cc)
+  TARGET_COMPILE_DEFINITIONS(ieee-element-bench PRIVATE FP16_COMPARATIVE_BENCHMARKS=1)
+  TARGET_INCLUDE_DIRECTORIES(ieee-element-bench PRIVATE ${PROJECT_SOURCE_DIR})
+  TARGET_LINK_LIBRARIES(ieee-element-bench fp16 psimd benchmark)
+
+  ADD_EXECUTABLE(alt-element-bench bench/alt-element.cc)
+  TARGET_LINK_LIBRARIES(alt-element-bench fp16 psimd benchmark)
+
+  ADD_EXECUTABLE(from-ieee-array-bench bench/from-ieee-array.cc)
+  FP16_TARGET_ENABLE_CXX11(from-ieee-array-bench)
+  TARGET_COMPILE_DEFINITIONS(from-ieee-array-bench PRIVATE FP16_COMPARATIVE_BENCHMARKS=1)
+  TARGET_INCLUDE_DIRECTORIES(from-ieee-array-bench PRIVATE ${PROJECT_SOURCE_DIR})
+  TARGET_LINK_LIBRARIES(from-ieee-array-bench fp16 psimd benchmark)
+
+  ADD_EXECUTABLE(from-alt-array-bench bench/from-alt-array.cc)
+  FP16_TARGET_ENABLE_CXX11(from-alt-array-bench)
+  TARGET_LINK_LIBRARIES(from-alt-array-bench fp16 psimd benchmark)
+
+  ADD_EXECUTABLE(to-ieee-array-bench bench/to-ieee-array.cc)
+  FP16_TARGET_ENABLE_CXX11(to-ieee-array-bench)
+  TARGET_COMPILE_DEFINITIONS(to-ieee-array-bench PRIVATE FP16_COMPARATIVE_BENCHMARKS=1)
+  TARGET_INCLUDE_DIRECTORIES(to-ieee-array-bench PRIVATE ${PROJECT_SOURCE_DIR})
+  TARGET_LINK_LIBRARIES(to-ieee-array-bench fp16 psimd benchmark)
+
+  ADD_EXECUTABLE(to-alt-array-bench bench/to-alt-array.cc)
+  FP16_TARGET_ENABLE_CXX11(to-alt-array-bench)
+  TARGET_LINK_LIBRARIES(to-alt-array-bench fp16 psimd benchmark)
+ENDIF()
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..eabec6c
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,11 @@
+The MIT License (MIT)
+
+Copyright (c) 2017 Facebook Inc.
+Copyright (c) 2017 Georgia Institute of Technology
+Copyright 2019 Google LLC
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..dd36746
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,18 @@
+name: "FP16"
+description:
+    "Header-only library for conversion to/from half-precision floating point "
+    "formats"
+
+third_party {
+  url {
+    type: HOMEPAGE
+    value: "https://github.com/Maratyszcza/FP16"
+  }
+  url {
+    type: GIT
+    value: "https://github.com/Maratyszcza/FP16"
+  }
+  version: "ba1d31f5eed2eb4a69e4dea3870a68c7c95f998f"
+  last_upgrade_date { year: 2020 month: 2 day: 3 }
+  license_type: NOTICE
+}
diff --git a/MODULE_LICENSE_MIT b/MODULE_LICENSE_MIT
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_MIT
diff --git a/NOTICE b/NOTICE
new file mode 120000
index 0000000..7a694c9
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1 @@
+LICENSE
\ No newline at end of file
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..a2cc597
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,11 @@
+butlermichael@google.com
+dgross@google.com
+galarragas@google.com
+jeanluc@google.com
+levp@google.com
+maratek@google.com
+miaowang@google.com
+pszczepaniak@google.com
+slavash@google.com
+vddang@google.com
+xusongw@google.com
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6cba158
--- /dev/null
+++ b/README.md
@@ -0,0 +1,20 @@
+# FP16
+Header-only library for conversion to/from half-precision floating point formats
+
+## Features
+
+- Supports IEEE and ARM alternative half-precision floating-point format
+    - Property converts infinities and NaNs
+    - Properly converts denormal numbers, even on systems without denormal support
+- Header-only library, no installation or build required
+- Compatible with C99 and C++11
+- Fully covered with unit tests and microbenchmarks
+
+## Acknowledgements
+
+[![HPC Garage logo](https://github.com/Maratyszcza/PeachPy/blob/master/logo/hpcgarage.png)](http://hpcgarage.org)
+[![Georgia Tech College of Computing logo](https://github.com/Maratyszcza/PeachPy/blob/master/logo/college-of-computing.gif)](http://www.cse.gatech.edu/)
+
+The library is developed by [Marat Dukhan](http://www.maratdukhan.com) of Georgia Tech. FP16 is a research project at [Richard Vuduc](http://vuduc.org)'s HPC Garage lab in the Georgia Institute of Technology, College of Computing, School of Computational Science and Engineering.
+
+This material is based upon work supported by the U.S. National Science Foundation (NSF) Award Number 1339745. Any opinions, findings and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect those of NSF.
diff --git a/TEST_MAPPING b/TEST_MAPPING
new file mode 100644
index 0000000..b03f7be
--- /dev/null
+++ b/TEST_MAPPING
@@ -0,0 +1,25 @@
+{
+  "presubmit": [
+    {
+      "name": "Fp16AltFromFp32ValueTests"
+    },
+    {
+      "name": "Fp16AltToFp32BitsTests"
+    },
+    {
+      "name": "Fp16AltToFp32ValueTests"
+    },
+    {
+      "name": "Fp16BitcastsTests"
+    },
+    {
+      "name": "Fp16IEEEFromFp32ValueTests"
+    },
+    {
+      "name": "Fp16IEEEToFp32BitsTests"
+    },
+    {
+      "name": "Fp16IEEEToFp32ValueTests"
+    }
+ ]
+}
diff --git a/bench/alt-element.cc b/bench/alt-element.cc
new file mode 100644
index 0000000..f0598ab
--- /dev/null
+++ b/bench/alt-element.cc
@@ -0,0 +1,89 @@
+#include <benchmark/benchmark.h>
+
+#include <fp16.h>
+#ifndef EMSCRIPTEN
+	#include <fp16/psimd.h>
+#endif
+
+static inline uint16_t next_xorshift16(uint16_t x) {
+	x ^= x >> 8;
+	x ^= x << 9;
+	x ^= x >> 5;
+	return x;
+}
+
+static inline uint32_t next_xorshift32(uint32_t x) {
+	x ^= x >> 13;
+	x ^= x << 17;
+	x ^= x >> 5;
+	return x;
+}
+#ifndef EMSCRIPTEN
+	PSIMD_INTRINSIC psimd_u16 next_xorshift16_psimd(psimd_u16 x) {
+		x ^= x >> psimd_splat_u16(8);
+		x ^= x << psimd_splat_u16(9);
+		x ^= x >> psimd_splat_u16(5);
+		return x;
+	}
+#endif
+
+
+static void fp16_alt_to_fp32_bits(benchmark::State& state) {
+	uint16_t fp16 = UINT16_C(0x7C00);
+	while (state.KeepRunning()) {
+		const uint32_t fp32 = fp16_alt_to_fp32_bits(fp16);
+
+		fp16 = next_xorshift16(fp16);
+		benchmark::DoNotOptimize(fp32);
+	}
+}
+BENCHMARK(fp16_alt_to_fp32_bits);
+
+static void fp16_alt_to_fp32_value(benchmark::State& state) {
+	uint16_t fp16 = UINT16_C(0x7C00);
+	while (state.KeepRunning()) {
+		const float fp32 = fp16_alt_to_fp32_value(fp16);
+
+		fp16 = next_xorshift16(fp16);
+		benchmark::DoNotOptimize(fp32);
+	}
+}
+BENCHMARK(fp16_alt_to_fp32_value);
+
+#ifndef EMSCRIPTEN
+	static void fp16_alt_to_fp32_psimd(benchmark::State& state) {
+		psimd_u16 fp16 = (psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03 };
+		while (state.KeepRunning()) {
+			const psimd_f32 fp32 = fp16_alt_to_fp32_psimd(fp16);
+
+			fp16 = next_xorshift16_psimd(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(fp16_alt_to_fp32_psimd);
+
+	static void fp16_alt_to_fp32x2_psimd(benchmark::State& state) {
+		psimd_u16 fp16 =
+			(psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03, 0x7C04, 0x7C05, 0x7C06, 0x7C07 };
+		while (state.KeepRunning()) {
+			const psimd_f32x2 fp32 = fp16_alt_to_fp32x2_psimd(fp16);
+
+			fp16 = next_xorshift16_psimd(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(fp16_alt_to_fp32x2_psimd);
+#endif
+
+static void fp16_alt_from_fp32_value(benchmark::State& state) {
+	uint32_t fp32 = UINT32_C(0x7F800000);
+	while (state.KeepRunning()) {
+		const uint16_t fp16 = fp16_alt_from_fp32_value(fp32_from_bits(fp32));
+
+		fp32 = next_xorshift32(fp32);
+		benchmark::DoNotOptimize(fp16);
+	}
+}
+BENCHMARK(fp16_alt_from_fp32_value);
+
+BENCHMARK_MAIN();
diff --git a/bench/from-alt-array.cc b/bench/from-alt-array.cc
new file mode 100644
index 0000000..ceeb839
--- /dev/null
+++ b/bench/from-alt-array.cc
@@ -0,0 +1,175 @@
+#include <benchmark/benchmark.h>
+
+#include <fp16.h>
+#ifndef EMSCRIPTEN
+	#include <fp16/psimd.h>
+#endif
+
+#include <vector>
+#include <random>
+#include <chrono>
+#include <functional>
+#include <algorithm>
+
+#if defined(__ARM_NEON__) || defined(__aarch64__)
+	#include <arm_neon.h>
+#endif
+
+
+static void fp16_alt_to_fp32_bits(benchmark::State& state) {
+	const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+	auto rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 0x7BFF), std::mt19937(seed));
+
+	std::vector<uint16_t> fp16(state.range(0));
+	std::vector<uint32_t> fp32(state.range(0));
+	std::generate(fp16.begin(), fp16.end(),
+		[&rng]{ return fp16_alt_from_fp32_value(rng()); });
+
+	while (state.KeepRunning()) {
+		uint16_t* input = fp16.data();
+		benchmark::DoNotOptimize(input);
+
+		uint32_t* output = fp32.data();
+		const size_t n = state.range(0);
+		for (size_t i = 0; i < n; i++) {
+			output[i] = fp16_alt_to_fp32_bits(input[i]);
+		}
+
+		benchmark::DoNotOptimize(output);
+	}
+	state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+}
+BENCHMARK(fp16_alt_to_fp32_bits)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+static void fp16_alt_to_fp32_value(benchmark::State& state) {
+	const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+	auto rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 0x7BFF), std::mt19937(seed));
+
+	std::vector<uint16_t> fp16(state.range(0));
+	std::vector<float> fp32(state.range(0));
+	std::generate(fp16.begin(), fp16.end(),
+		[&rng]{ return fp16_alt_from_fp32_value(rng()); });
+
+	while (state.KeepRunning()) {
+		uint16_t* input = fp16.data();
+		benchmark::DoNotOptimize(input);
+
+		float* output = fp32.data();
+		const size_t n = state.range(0);
+		for (size_t i = 0; i < n; i++) {
+			output[i] = fp16_alt_to_fp32_value(input[i]);
+		}
+
+		benchmark::DoNotOptimize(output);
+	}
+	state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+}
+BENCHMARK(fp16_alt_to_fp32_value)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+#ifndef EMSCRIPTEN
+	static void fp16_alt_to_fp32_psimd(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 0x7BFF), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_alt_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n - 4; i += 4) {
+				psimd_store_f32(&output[i],
+					fp16_alt_to_fp32_psimd(
+						psimd_load_u16(&input[i])));
+			}
+			const psimd_u16 last_vector = { input[n - 4], input[n - 3], input[n - 2], input[n - 1] };
+			psimd_store_f32(&output[n - 4],
+				fp16_alt_to_fp32_psimd(last_vector));
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(fp16_alt_to_fp32_psimd)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void fp16_alt_to_fp32x2_psimd(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 0x7BFF), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_alt_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i += 8) {
+				const psimd_f32x2 data =
+					fp16_alt_to_fp32x2_psimd(
+						psimd_load_u16(&input[i]));
+				psimd_store_f32(&output[i], data.lo);
+				psimd_store_f32(&output[i + 4], data.hi);
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(fp16_alt_to_fp32x2_psimd)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+#if defined(__ARM_NEON_FP) && (__ARM_NEON_FP & 0x2) || defined(__aarch64__)
+	static void hardware_vcvt_f32_f16(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			#if defined(__aarch64__)
+				const unsigned int fpcr = __builtin_aarch64_get_fpcr();
+				/* Disable flush-to-zero (bit 24) and enable Alternative FP16 format (bit 26) */
+				__builtin_aarch64_set_fpcr((fpcr & 0xFEFFFFFFu) | 0x08000000u);
+			#else
+				unsigned int fpscr;
+				__asm__ __volatile__ ("VMRS %[fpscr], fpscr" : [fpscr] "=r" (fpscr));
+				/* Disable flush-to-zero (bit 24) and enable Alternative FP16 format (bit 26) */
+				__asm__ __volatile__ ("VMSR fpscr, %[fpscr]" :
+					: [fpscr] "r" ((fpscr & 0xFEFFFFFFu) | 0x08000000u));
+			#endif
+			for (size_t i = 0; i < n; i += 4) {
+				vst1q_f32(&output[i],
+					vcvt_f32_f16(
+						(float16x4_t) vld1_u16(&input[i])));
+			}
+			#if defined(__aarch64__)
+				__builtin_aarch64_set_fpcr(fpcr);
+			#else
+				__asm__ __volatile__ ("VMSR fpscr, %[fpscr]" :: [fpscr] "r" (fpscr));
+			#endif
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(hardware_vcvt_f32_f16)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+BENCHMARK_MAIN();
diff --git a/bench/from-ieee-array.cc b/bench/from-ieee-array.cc
new file mode 100644
index 0000000..9c3094b
--- /dev/null
+++ b/bench/from-ieee-array.cc
@@ -0,0 +1,399 @@
+#include <benchmark/benchmark.h>
+
+#include <fp16.h>
+#ifndef EMSCRIPTEN
+	#include <fp16/psimd.h>
+#endif
+
+#include <vector>
+#include <random>
+#include <chrono>
+#include <functional>
+#include <algorithm>
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	#include <immintrin.h>
+#endif
+
+#if defined(__ARM_NEON__) || defined(__aarch64__)
+	#include <arm_neon.h>
+#endif
+
+#ifdef FP16_COMPARATIVE_BENCHMARKS
+	#include <third-party/THHalf.h>
+	#include <third-party/npy-halffloat.h>
+	#include <third-party/eigen-half.h>
+	#include <third-party/float16-compressor.h>
+	#include <third-party/half.hpp>
+#endif
+
+
+static void fp16_ieee_to_fp32_bits(benchmark::State& state) {
+	const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+	auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+	std::vector<uint16_t> fp16(state.range(0));
+	std::vector<uint32_t> fp32(state.range(0));
+	std::generate(fp16.begin(), fp16.end(),
+		[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+	while (state.KeepRunning()) {
+		uint16_t* input = fp16.data();
+		benchmark::DoNotOptimize(input);
+
+		uint32_t* output = fp32.data();
+		const size_t n = state.range(0);
+		for (size_t i = 0; i < n; i++) {
+			output[i] = fp16_ieee_to_fp32_bits(input[i]);
+		}
+
+		benchmark::DoNotOptimize(output);
+	}
+	state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+}
+BENCHMARK(fp16_ieee_to_fp32_bits)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+static void fp16_ieee_to_fp32_value(benchmark::State& state) {
+	const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+	auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+	std::vector<uint16_t> fp16(state.range(0));
+	std::vector<float> fp32(state.range(0));
+	std::generate(fp16.begin(), fp16.end(),
+		[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+	while (state.KeepRunning()) {
+		uint16_t* input = fp16.data();
+		benchmark::DoNotOptimize(input);
+
+		float* output = fp32.data();
+		const size_t n = state.range(0);
+		for (size_t i = 0; i < n; i++) {
+			output[i] = fp16_ieee_to_fp32_value(input[i]);
+		}
+
+		benchmark::DoNotOptimize(output);
+	}
+	state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+}
+BENCHMARK(fp16_ieee_to_fp32_value)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+#ifndef EMSCRIPTEN
+	static void fp16_ieee_to_fp32_psimd(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n - 4; i += 4) {
+				psimd_store_f32(&output[i],
+					fp16_ieee_to_fp32_psimd(
+						psimd_load_u16(&input[i])));
+			}
+			const psimd_u16 last_vector = { input[n - 4], input[n - 3], input[n - 2], input[n - 1] };
+			psimd_store_f32(&output[n - 4],
+				fp16_ieee_to_fp32_psimd(last_vector));
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(fp16_ieee_to_fp32_psimd)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void fp16_ieee_to_fp32x2_psimd(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i += 8) {
+				const psimd_f32x2 data =
+					fp16_ieee_to_fp32x2_psimd(
+						psimd_load_u16(&input[i]));
+				psimd_store_f32(&output[i], data.lo);
+				psimd_store_f32(&output[i + 4], data.hi);
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(fp16_ieee_to_fp32x2_psimd)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	static void hardware_mm_cvtph_ps(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i += 4) {
+				_mm_storeu_ps(&output[i],
+					_mm_cvtph_ps(
+						_mm_loadl_epi64(static_cast<const __m128i*>(static_cast<const void*>(&input[i])))));
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(hardware_mm_cvtph_ps)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void hardware_mm256_cvtph_ps(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i += 8) {
+				_mm256_storeu_ps(&output[i],
+					_mm256_cvtph_ps(
+						_mm_loadu_si128(static_cast<const __m128i*>(static_cast<const void*>(&input[i])))));
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(hardware_mm256_cvtph_ps)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+#if defined(__ARM_NEON_FP) && (__ARM_NEON_FP & 0x2) || defined(__aarch64__)
+	static void hardware_vcvt_f32_f16(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			#if defined(__aarch64__)
+				const unsigned int fpcr = __builtin_aarch64_get_fpcr();
+				/* Disable flush-to-zero (bit 24) and Alternative FP16 format (bit 26) */
+				__builtin_aarch64_set_fpcr(fpcr & 0xF6FFFFFFu);
+			#else
+				unsigned int fpscr;
+				__asm__ __volatile__ ("VMRS %[fpscr], fpscr" : [fpscr] "=r" (fpscr));
+				/* Disable flush-to-zero (bit 24) and Alternative FP16 format (bit 26) */
+				__asm__ __volatile__ ("VMSR fpscr, %[fpscr]" :
+					: [fpscr] "r" (fpscr & 0xF6FFFFFFu));
+			#endif
+			for (size_t i = 0; i < n; i += 4) {
+				vst1q_f32(&output[i],
+					vcvt_f32_f16(
+						(float16x4_t) vld1_u16(&input[i])));
+			}
+			#if defined(__aarch64__)
+				__builtin_aarch64_set_fpcr(fpcr);
+			#else
+				__asm__ __volatile__ ("VMSR fpscr, %[fpscr]" :: [fpscr] "r" (fpscr));
+			#endif
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(hardware_vcvt_f32_f16)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+#ifdef FP16_COMPARATIVE_BENCHMARKS
+	static void TH_halfbits2float(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				TH_halfbits2float(&input[i], &output[i]);
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(TH_halfbits2float)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void npy_halfbits_to_floatbits(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<uint32_t> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			uint32_t* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] = npy_halfbits_to_floatbits(input[i]);
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(npy_halfbits_to_floatbits)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void Eigen_half_to_float(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] =
+					Eigen::half_impl::half_to_float(
+						Eigen::half_impl::raw_uint16_to_half(input[i]));
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(Eigen_half_to_float)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void Float16Compressor_decompress(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] = Float16Compressor::decompress(input[i]);
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(Float16Compressor_decompress)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void half_float_detail_half2float_table(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] = half_float::detail::half2float_impl(input[i],
+					half_float::detail::true_type());
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(half_float_detail_half2float_table)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void half_float_detail_half2float_branch(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<uint16_t> fp16(state.range(0));
+		std::vector<float> fp32(state.range(0));
+		std::generate(fp16.begin(), fp16.end(),
+			[&rng]{ return fp16_ieee_from_fp32_value(rng()); });
+
+		while (state.KeepRunning()) {
+			uint16_t* input = fp16.data();
+			benchmark::DoNotOptimize(input);
+
+			float* output = fp32.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] = half_float::detail::half2float_impl(input[i],
+					half_float::detail::false_type());
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(half_float_detail_half2float_branch)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+BENCHMARK_MAIN();
diff --git a/bench/ieee-element.cc b/bench/ieee-element.cc
new file mode 100644
index 0000000..a9fbf56
--- /dev/null
+++ b/bench/ieee-element.cc
@@ -0,0 +1,272 @@
+#include <benchmark/benchmark.h>
+
+#include <fp16.h>
+#ifndef EMSCRIPTEN
+	#include <fp16/psimd.h>
+#endif
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	#include <immintrin.h>
+#endif
+
+#ifdef FP16_COMPARATIVE_BENCHMARKS
+	#include <third-party/THHalf.h>
+	#include <third-party/npy-halffloat.h>
+	#include <third-party/eigen-half.h>
+	#include <third-party/float16-compressor.h>
+	#include <third-party/half.hpp>
+#endif
+
+static inline uint16_t next_xorshift16(uint16_t x) {
+	x ^= x >> 8;
+	x ^= x << 9;
+	x ^= x >> 5;
+	return x;
+}
+
+static inline uint32_t next_xorshift32(uint32_t x) {
+	x ^= x >> 13;
+	x ^= x << 17;
+	x ^= x >> 5;
+	return x;
+}
+#ifndef EMSCRIPTEN
+	PSIMD_INTRINSIC psimd_u16 next_xorshift16_psimd(psimd_u16 x) {
+		x ^= x >> psimd_splat_u16(8);
+		x ^= x << psimd_splat_u16(9);
+		x ^= x >> psimd_splat_u16(5);
+		return x;
+	}
+#endif
+
+
+/* Conversion from IEEE FP16 to IEEE FP32 */
+
+static void fp16_ieee_to_fp32_bits(benchmark::State& state) {
+	uint16_t fp16 = UINT16_C(0x7C00);
+	while (state.KeepRunning()) {
+		const uint32_t fp32 = fp16_ieee_to_fp32_bits(fp16);
+
+		fp16 = next_xorshift16(fp16);
+		benchmark::DoNotOptimize(fp32);
+	}
+}
+BENCHMARK(fp16_ieee_to_fp32_bits);
+
+static void fp16_ieee_to_fp32_value(benchmark::State& state) {
+	uint16_t fp16 = UINT16_C(0x7C00);
+	while (state.KeepRunning()) {
+		const float fp32 = fp16_ieee_to_fp32_value(fp16);
+
+		fp16 = next_xorshift16(fp16);
+		benchmark::DoNotOptimize(fp32);
+	}
+}
+BENCHMARK(fp16_ieee_to_fp32_value);
+
+#ifndef EMSCRIPTEN
+	static void fp16_ieee_to_fp32_psimd(benchmark::State& state) {
+		psimd_u16 fp16 = (psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03 };
+		while (state.KeepRunning()) {
+			const psimd_f32 fp32 = fp16_ieee_to_fp32_psimd(fp16);
+
+			fp16 = next_xorshift16_psimd(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(fp16_ieee_to_fp32_psimd);
+
+	static void fp16_ieee_to_fp32x2_psimd(benchmark::State& state) {
+		psimd_u16 fp16 =
+			(psimd_u16) { 0x7C00, 0x7C01, 0x7C02, 0x7C03, 0x7C04, 0x7C05, 0x7C06, 0x7C07 };
+		while (state.KeepRunning()) {
+			const psimd_f32x2 fp32 = fp16_ieee_to_fp32x2_psimd(fp16);
+
+			fp16 = next_xorshift16_psimd(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(fp16_ieee_to_fp32x2_psimd);
+#endif
+
+#ifdef FP16_COMPARATIVE_BENCHMARKS
+	static void TH_halfbits2float(benchmark::State& state) {
+		uint16_t fp16 = UINT16_C(0x7C00);
+		while (state.KeepRunning()) {
+			float fp32;
+			TH_halfbits2float(&fp16, &fp32);
+
+			fp16 = next_xorshift16(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(TH_halfbits2float);
+
+	static void npy_halfbits_to_floatbits(benchmark::State& state) {
+		uint16_t fp16 = UINT16_C(0x7C00);
+		while (state.KeepRunning()) {
+			const uint32_t fp32 = npy_halfbits_to_floatbits(fp16);
+
+			fp16 = next_xorshift16(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(npy_halfbits_to_floatbits);
+
+	static void Eigen_half_to_float(benchmark::State& state) {
+		uint16_t fp16 = UINT16_C(0x7C00);
+		while (state.KeepRunning()) {
+			const float fp32 =
+				Eigen::half_impl::half_to_float(
+					Eigen::half_impl::raw_uint16_to_half(fp16));
+
+			fp16 = next_xorshift16(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(Eigen_half_to_float);
+
+	static void Float16Compressor_decompress(benchmark::State& state) {
+		uint16_t fp16 = UINT16_C(0x7C00);
+		while (state.KeepRunning()) {
+			const float fp32 = Float16Compressor::decompress(fp16);
+
+			fp16 = next_xorshift16(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(Float16Compressor_decompress);
+
+	static void half_float_detail_half2float_table(benchmark::State& state) {
+		uint16_t fp16 = UINT16_C(0x7C00);
+		while (state.KeepRunning()) {
+			const float fp32 =
+				half_float::detail::half2float_impl(fp16,
+					half_float::detail::true_type());
+
+			fp16 = next_xorshift16(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(half_float_detail_half2float_table);
+
+	static void half_float_detail_half2float_branch(benchmark::State& state) {
+		uint16_t fp16 = UINT16_C(0x7C00);
+		while (state.KeepRunning()) {
+			const float fp32 =
+				half_float::detail::half2float_impl(fp16,
+					half_float::detail::false_type());
+
+			fp16 = next_xorshift16(fp16);
+			benchmark::DoNotOptimize(fp32);
+		}
+	}
+	BENCHMARK(half_float_detail_half2float_branch);
+#endif
+
+/* Conversion from IEEE FP32 to IEEE FP16 */
+
+static void fp16_ieee_from_fp32_value(benchmark::State& state) {
+	uint32_t fp32 = UINT32_C(0x7F800000);
+	while (state.KeepRunning()) {
+		const uint16_t fp16 = fp16_ieee_from_fp32_value(fp32_from_bits(fp32));
+
+		fp32 = next_xorshift32(fp32);
+		benchmark::DoNotOptimize(fp16);
+	}
+}
+BENCHMARK(fp16_ieee_from_fp32_value);
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	static void fp16_ieee_from_fp32_hardware(benchmark::State& state) {
+		uint32_t fp32 = UINT32_C(0x7F800000);
+		while (state.KeepRunning()) {
+			const uint16_t fp16 = static_cast<uint16_t>(
+				_mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(fp32), _MM_FROUND_CUR_DIRECTION)));
+
+			fp32 = next_xorshift32(fp32);
+			benchmark::DoNotOptimize(fp16);
+		}
+	}
+	BENCHMARK(fp16_ieee_from_fp32_hardware);
+#endif
+
+#ifdef FP16_COMPARATIVE_BENCHMARKS
+	static void TH_float2halfbits(benchmark::State& state) {
+		uint32_t fp32 = UINT32_C(0x7F800000);
+		while (state.KeepRunning()) {
+			uint16_t fp16;
+			float fp32_value = fp32_from_bits(fp32);
+			TH_float2halfbits(&fp32_value, &fp16);
+
+			fp32 = next_xorshift32(fp32);
+			benchmark::DoNotOptimize(fp16);
+		}
+	}
+	BENCHMARK(TH_float2halfbits);
+
+	static void npy_floatbits_to_halfbits(benchmark::State& state) {
+		uint32_t fp32 = UINT32_C(0x7F800000);
+		while (state.KeepRunning()) {
+			const uint16_t fp16 = npy_floatbits_to_halfbits(fp32);
+
+			fp32 = next_xorshift32(fp32);
+			benchmark::DoNotOptimize(fp16);
+		}
+	}
+	BENCHMARK(npy_floatbits_to_halfbits);
+
+	static void Eigen_float_to_half_rtne(benchmark::State& state) {
+		uint32_t fp32 = UINT32_C(0x7F800000);
+		while (state.KeepRunning()) {
+			const Eigen::half_impl::__half fp16 =
+				Eigen::half_impl::float_to_half_rtne(
+					fp32_from_bits(fp32));
+
+			fp32 = next_xorshift32(fp32);
+			benchmark::DoNotOptimize(fp16);
+		}
+	}
+	BENCHMARK(Eigen_float_to_half_rtne);
+
+	static void Float16Compressor_compress(benchmark::State& state) {
+		uint32_t fp32 = UINT32_C(0x7F800000);
+		while (state.KeepRunning()) {
+			const uint16_t fp16 = Float16Compressor::compress(fp32_from_bits(fp32));
+
+			fp32 = next_xorshift32(fp32);
+			benchmark::DoNotOptimize(fp16);
+		}
+	}
+	BENCHMARK(Float16Compressor_compress);
+
+	static void half_float_detail_float2half_table(benchmark::State& state) {
+		uint32_t fp32 = UINT32_C(0x7F800000);
+		while (state.KeepRunning()) {
+			const uint16_t fp16 =
+				half_float::detail::float2half_impl<std::round_to_nearest>(
+					fp32_from_bits(fp32),
+						half_float::detail::true_type());
+
+			fp32 = next_xorshift32(fp32);
+			benchmark::DoNotOptimize(fp16);
+		}
+	}
+	BENCHMARK(half_float_detail_float2half_table);
+
+	static void half_float_detail_float2half_branch(benchmark::State& state) {
+		uint32_t fp32 = UINT32_C(0x7F800000);
+		while (state.KeepRunning()) {
+			const uint16_t fp16 =
+				half_float::detail::float2half_impl<std::round_to_nearest>(
+					fp32_from_bits(fp32),
+						half_float::detail::false_type());
+
+			fp32 = next_xorshift32(fp32);
+			benchmark::DoNotOptimize(fp16);
+		}
+	}
+	BENCHMARK(half_float_detail_float2half_branch);
+#endif
+
+BENCHMARK_MAIN();
diff --git a/bench/to-alt-array.cc b/bench/to-alt-array.cc
new file mode 100644
index 0000000..8ef4f19
--- /dev/null
+++ b/bench/to-alt-array.cc
@@ -0,0 +1,91 @@
+#include <benchmark/benchmark.h>
+
+#include <fp16.h>
+#ifndef EMSCRIPTEN
+	#include <fp16/psimd.h>
+#endif
+
+#include <vector>
+#include <random>
+#include <chrono>
+#include <functional>
+#include <algorithm>
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	#include <immintrin.h>
+#endif
+
+#if defined(__ARM_NEON__) || defined(__aarch64__)
+	#include <arm_neon.h>
+#endif
+
+
+static void fp16_alt_from_fp32_value(benchmark::State& state) {
+	const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+	auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+	std::vector<float> fp32(state.range(0));
+	std::vector<uint16_t> fp16(state.range(0));
+	std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+	while (state.KeepRunning()) {
+		float* input = fp32.data();
+		benchmark::DoNotOptimize(input);
+
+		uint16_t* output = fp16.data();
+		const size_t n = state.range(0);
+		for (size_t i = 0; i < n; i++) {
+			output[i] = fp16_alt_from_fp32_value(input[i]);
+		}
+
+		benchmark::DoNotOptimize(output);
+	}
+	state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+}
+BENCHMARK(fp16_alt_from_fp32_value)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+#if defined(__ARM_NEON_FP) && (__ARM_NEON_FP & 0x2) || defined(__aarch64__)
+	static void hardware_vcvt_f16_f32(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			#if defined(__aarch64__)
+				const unsigned int fpcr = __builtin_aarch64_get_fpcr();
+				/* Disable flush-to-zero (bit 24) and enable Alternative FP16 format (bit 26) */
+				__builtin_aarch64_set_fpcr((fpcr & 0xFEFFFFFFu) | 0x08000000u);
+			#else
+				unsigned int fpscr;
+				__asm__ __volatile__ ("VMRS %[fpscr], fpscr" : [fpscr] "=r" (fpscr));
+				/* Disable flush-to-zero (bit 24) and enable Alternative FP16 format (bit 26) */
+				__asm__ __volatile__ ("VMSR fpscr, %[fpscr]" :
+					: [fpscr] "r" ((fpscr & 0xFEFFFFFFu) | 0x08000000u));
+			#endif
+			for (size_t i = 0; i < n; i += 4) {
+				vst1_u16(&output[i],
+					(uint16x4_t) vcvt_f16_f32(
+						vld1q_f32(&input[i])));
+			}
+			#if defined(__aarch64__)
+				__builtin_aarch64_set_fpcr(fpcr);
+			#else
+				__asm__ __volatile__ ("VMSR fpscr, %[fpscr]" :: [fpscr] "r" (fpscr));
+			#endif
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(hardware_vcvt_f16_f32)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+BENCHMARK_MAIN();
diff --git a/bench/to-ieee-array.cc b/bench/to-ieee-array.cc
new file mode 100644
index 0000000..a0c7f58
--- /dev/null
+++ b/bench/to-ieee-array.cc
@@ -0,0 +1,303 @@
+#include <benchmark/benchmark.h>
+
+#include <fp16.h>
+#ifndef EMSCRIPTEN
+	#include <fp16/psimd.h>
+#endif
+
+#include <vector>
+#include <random>
+#include <chrono>
+#include <functional>
+#include <algorithm>
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	#include <immintrin.h>
+#endif
+
+#if defined(__ARM_NEON__) || defined(__aarch64__)
+	#include <arm_neon.h>
+#endif
+
+#ifdef FP16_COMPARATIVE_BENCHMARKS
+	#include <third-party/THHalf.h>
+	#include <third-party/npy-halffloat.h>
+	#include <third-party/eigen-half.h>
+	#include <third-party/float16-compressor.h>
+	#include <third-party/half.hpp>
+#endif
+
+
+static void fp16_ieee_from_fp32_value(benchmark::State& state) {
+	const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+	auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+	std::vector<float> fp32(state.range(0));
+	std::vector<uint16_t> fp16(state.range(0));
+	std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+	while (state.KeepRunning()) {
+		float* input = fp32.data();
+		benchmark::DoNotOptimize(input);
+
+		uint16_t* output = fp16.data();
+		const size_t n = state.range(0);
+		for (size_t i = 0; i < n; i++) {
+			output[i] = fp16_ieee_from_fp32_value(input[i]);
+		}
+
+		benchmark::DoNotOptimize(output);
+	}
+	state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+}
+BENCHMARK(fp16_ieee_from_fp32_value)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	static void hardware_mm_cvtps_ph(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i += 4) {
+				_mm_storel_epi64(
+					static_cast<__m128i*>(static_cast<void*>(&output[i])),
+					_mm_cvtps_ph(_mm_loadu_ps(&input[i]), _MM_FROUND_CUR_DIRECTION));
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(hardware_mm_cvtps_ph)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void hardware_mm256_cvtps_ph(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i += 8) {
+				_mm_storeu_si128(
+					static_cast<__m128i*>(static_cast<void*>(&output[i])),
+					_mm256_cvtps_ph(_mm256_loadu_ps(&input[i]), _MM_FROUND_CUR_DIRECTION));
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(hardware_mm256_cvtps_ph)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+#if defined(__ARM_NEON_FP) && (__ARM_NEON_FP & 0x2) || defined(__aarch64__)
+	static void hardware_vcvt_f16_f32(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			#if defined(__aarch64__)
+				const unsigned int fpcr = __builtin_aarch64_get_fpcr();
+				/* Disable flush-to-zero (bit 24) and Alternative FP16 format (bit 26) */
+				__builtin_aarch64_set_fpcr(fpcr & 0xF6FFFFFFu);
+			#else
+				unsigned int fpscr;
+				__asm__ __volatile__ ("VMRS %[fpscr], fpscr" : [fpscr] "=r" (fpscr));
+				/* Disable flush-to-zero (bit 24) and Alternative FP16 format (bit 26) */
+				__asm__ __volatile__ ("VMSR fpscr, %[fpscr]" :
+					: [fpscr] "r" (fpscr & 0xF6FFFFFFu));
+			#endif
+			for (size_t i = 0; i < n; i += 4) {
+				vst1_u16(&output[i],
+					(uint16x4_t) vcvt_f16_f32(
+						vld1q_f32(&input[i])));
+			}
+			#if defined(__aarch64__)
+				__builtin_aarch64_set_fpcr(fpcr);
+			#else
+				__asm__ __volatile__ ("VMSR fpscr, %[fpscr]" :: [fpscr] "r" (fpscr));
+			#endif
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(hardware_vcvt_f16_f32)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+#ifdef FP16_COMPARATIVE_BENCHMARKS
+	static void TH_float2halfbits(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				TH_float2halfbits(&input[i], &output[i]);
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(TH_float2halfbits)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void npy_floatbits_to_halfbits(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] = npy_floatbits_to_halfbits(fp32_to_bits(input[i]));
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(npy_floatbits_to_halfbits)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void Eigen_float_to_half_rtne(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] = Eigen::half_impl::float_to_half_rtne(input[i]).x;
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(Eigen_float_to_half_rtne)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void Float16Compressor_compress(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] = Float16Compressor::compress(input[i]);
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(Float16Compressor_compress)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void half_float_detail_float2half_table(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] =
+					half_float::detail::float2half_impl<std::round_to_nearest>(
+						input[i], half_float::detail::true_type());
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(half_float_detail_float2half_table)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+
+	static void half_float_detail_float2half_branch(benchmark::State& state) {
+		const uint_fast32_t seed = std::chrono::system_clock::now().time_since_epoch().count();
+		auto rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::mt19937(seed));
+
+		std::vector<float> fp32(state.range(0));
+		std::vector<uint16_t> fp16(state.range(0));
+		std::generate(fp32.begin(), fp32.end(), std::ref(rng));
+
+		while (state.KeepRunning()) {
+			float* input = fp32.data();
+			benchmark::DoNotOptimize(input);
+
+			uint16_t* output = fp16.data();
+			const size_t n = state.range(0);
+			for (size_t i = 0; i < n; i++) {
+				output[i] =
+					half_float::detail::float2half_impl<std::round_to_nearest>(
+						input[i], half_float::detail::false_type());
+			}
+
+			benchmark::DoNotOptimize(output);
+		}
+		state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(state.range(0)));
+	}
+	BENCHMARK(half_float_detail_float2half_branch)->RangeMultiplier(2)->Range(1<<10, 64<<20);
+#endif
+
+BENCHMARK_MAIN();
diff --git a/cmake/DownloadGoogleBenchmark.cmake b/cmake/DownloadGoogleBenchmark.cmake
new file mode 100644
index 0000000..59da7a6
--- /dev/null
+++ b/cmake/DownloadGoogleBenchmark.cmake
@@ -0,0 +1,15 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
+
+PROJECT(googlebenchmark-download NONE)
+
+INCLUDE(ExternalProject)
+ExternalProject_Add(googlebenchmark
+	URL https://github.com/google/benchmark/archive/v1.2.0.zip
+	URL_HASH SHA256=cc463b28cb3701a35c0855fbcefb75b29068443f1952b64dd5f4f669272e95ea
+	SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark"
+	BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark"
+	CONFIGURE_COMMAND ""
+	BUILD_COMMAND ""
+	INSTALL_COMMAND ""
+	TEST_COMMAND ""
+)
diff --git a/cmake/DownloadGoogleTest.cmake b/cmake/DownloadGoogleTest.cmake
new file mode 100644
index 0000000..d69d19a
--- /dev/null
+++ b/cmake/DownloadGoogleTest.cmake
@@ -0,0 +1,15 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
+
+PROJECT(googletest-download NONE)
+
+INCLUDE(ExternalProject)
+ExternalProject_Add(googletest
+	URL https://github.com/google/googletest/archive/release-1.8.0.zip
+	URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
+	SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
+	BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
+	CONFIGURE_COMMAND ""
+	BUILD_COMMAND ""
+	INSTALL_COMMAND ""
+	TEST_COMMAND ""
+)
diff --git a/cmake/DownloadPSimd.cmake b/cmake/DownloadPSimd.cmake
new file mode 100644
index 0000000..1095138
--- /dev/null
+++ b/cmake/DownloadPSimd.cmake
@@ -0,0 +1,15 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
+
+PROJECT(psimd-download NONE)
+
+INCLUDE(ExternalProject)
+ExternalProject_Add(psimd
+	GIT_REPOSITORY https://github.com/Maratyszcza/psimd.git
+	GIT_TAG master
+	SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd"
+	BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd"
+	CONFIGURE_COMMAND ""
+	BUILD_COMMAND ""
+	INSTALL_COMMAND ""
+	TEST_COMMAND ""
+)
diff --git a/configure.py b/configure.py
new file mode 100755
index 0000000..7e54453
--- /dev/null
+++ b/configure.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+
+import confu
+parser = confu.standard_parser("FP16 configuration script")
+parser.add_argument("--compare", dest="compare", action="store_true",
+    help="Enable performance comparison with other half-precision implementations")
+
+def main(args):
+    options = parser.parse_args(args)
+    build = confu.Build.from_options(options)
+
+    build.export_cpath("include", ["fp16.h"])
+
+    with build.options(source_dir="test", extra_include_dirs="test", deps=[build.deps.googletest, build.deps.psimd]):
+        fp16_tables = build.cxx("tables.cc")
+        build.unittest("ieee-to-fp32-bits",
+            [build.cxx("ieee-to-fp32-bits.cc"), fp16_tables])
+        build.unittest("ieee-to-fp32-value",
+            [build.cxx("ieee-to-fp32-value.cc"), fp16_tables])
+        build.unittest("ieee-from-fp32-value",
+            [build.cxx("ieee-from-fp32-value.cc"), fp16_tables])
+
+        build.unittest("alt-to-fp32-bits",
+            [build.cxx("alt-to-fp32-bits.cc"), fp16_tables])
+        build.unittest("alt-to-fp32-value",
+            [build.cxx("alt-to-fp32-value.cc"), fp16_tables])
+        build.unittest("alt-from-fp32-value",
+            [build.cxx("alt-from-fp32-value.cc"), fp16_tables])
+
+        if build.target.is_x86_64:
+            stubs = build.peachpy("peachpy/stubs.py")
+            build.unittest("alt-xmm-to-fp32-ymm-avx", [build.cxx("peachpy/alt-xmm-to-fp32-xmm-avx.cc"), stubs])
+            build.unittest("alt-xmm-to-fp32-ymm-avx2", [build.cxx("peachpy/alt-xmm-to-fp32-ymm-avx2.cc"), stubs])
+
+        if not build.target.is_emscripten:
+            build.unittest("ieee-to-fp32-psimd", build.cxx("ieee-to-fp32-psimd.cc"))
+            build.unittest("alt-to-fp32-psimd", build.cxx("alt-to-fp32-psimd.cc"))
+
+            build.unittest("ieee-to-fp32x2-psimd", build.cxx("ieee-to-fp32x2-psimd.cc"))
+            build.unittest("alt-to-fp32x2-psimd", build.cxx("alt-to-fp32x2-psimd.cc"))
+
+        build.unittest("bitcasts", build.cxx("bitcasts.cc"))
+
+    macros = ["BENCHMARK_HAS_NO_INLINE_ASSEMBLY"]
+    if options.compare:
+        macros.append("FP16_COMPARATIVE_BENCHMARKS")
+    with build.options(source_dir="bench", extra_include_dirs=".", macros=macros,
+            deps=[build.deps.googlebenchmark, build.deps.psimd]):
+
+        build.benchmark("ieee-element-bench", build.cxx("ieee-element.cc"))
+        build.benchmark("alt-element-bench", build.cxx("alt-element.cc"))
+
+        build.benchmark("from-ieee-array-bench", build.cxx("from-ieee-array.cc"))
+        build.benchmark("from-alt-array-bench", build.cxx("from-alt-array.cc"))
+
+        build.benchmark("to-ieee-array-bench", build.cxx("to-ieee-array.cc"))
+        build.benchmark("to-alt-array-bench", build.cxx("to-alt-array.cc"))
+
+    return build
+
+
+if __name__ == "__main__":
+    import sys
+    main(sys.argv[1:]).generate()
diff --git a/confu.yaml b/confu.yaml
new file mode 100644
index 0000000..75f3fc9
--- /dev/null
+++ b/confu.yaml
@@ -0,0 +1,8 @@
+name: FP16
+title: half-precision floating-point conversion
+license: MIT
+deps:
+  - name: psimd
+    url:  https://github.com/Maratyszcza/psimd.git
+  - name: googletest
+  - name: googlebenchmark
\ No newline at end of file
diff --git a/include/fp16.h b/include/fp16.h
new file mode 100644
index 0000000..9d7366e
--- /dev/null
+++ b/include/fp16.h
@@ -0,0 +1,11 @@
+#pragma once
+#ifndef FP16_H
+#define FP16_H
+
+#include <fp16/fp16.h>
+
+#if defined(PSIMD_H)
+#include <fp16/psimd.h>
+#endif
+
+#endif /* FP16_H */
diff --git a/include/fp16/__init__.py b/include/fp16/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/include/fp16/__init__.py
diff --git a/include/fp16/avx.py b/include/fp16/avx.py
new file mode 100644
index 0000000..6d1074a
--- /dev/null
+++ b/include/fp16/avx.py
@@ -0,0 +1,50 @@
+from peachpy import *
+from peachpy.x86_64 import *
+
+
+def fp16_alt_xmm_to_fp32_xmm(xmm_half):
+	xmm_zero = XMMRegister()
+	VPXOR(xmm_zero, xmm_zero, xmm_zero)
+
+	xmm_word = XMMRegister()
+	VPUNPCKLWD(xmm_word, xmm_zero, xmm_half)
+
+	xmm_shl1_half = XMMRegister()
+	VPADDW(xmm_shl1_half, xmm_half, xmm_half)
+
+	xmm_shl1_nonsign = XMMRegister()
+	VPADDD(xmm_shl1_nonsign, xmm_word, xmm_word)
+
+	sign_mask = Constant.float32x4(-0.0)
+
+	xmm_sign = XMMRegister()
+	VANDPS(xmm_sign, xmm_word, sign_mask)
+
+	xmm_shr3_nonsign = XMMRegister()
+	VPSRLD(xmm_shr3_nonsign, xmm_shl1_nonsign, 4)
+
+	exp_offset = Constant.uint32x4(0x38000000)
+
+	xmm_norm_nonsign = XMMRegister()
+	VPADDD(xmm_norm_nonsign, xmm_shr3_nonsign, exp_offset)
+
+	magic_mask = Constant.uint16x8(0x3E80)
+	xmm_denorm_nonsign = XMMRegister()
+	VPUNPCKLWD(xmm_denorm_nonsign, xmm_shl1_half, magic_mask)
+
+	magic_bias = Constant.float32x4(0.25)
+	VSUBPS(xmm_denorm_nonsign, xmm_denorm_nonsign, magic_bias)
+
+	xmm_denorm_cutoff = XMMRegister()
+	VMOVDQA(xmm_denorm_cutoff, Constant.uint32x4(0x00800000))
+	
+	xmm_denorm_mask = XMMRegister()
+	VPCMPGTD(xmm_denorm_mask, xmm_denorm_cutoff, xmm_shr3_nonsign)
+
+	xmm_nonsign = XMMRegister()
+	VBLENDVPS(xmm_nonsign, xmm_norm_nonsign, xmm_denorm_nonsign, xmm_denorm_mask)
+
+	xmm_float = XMMRegister()
+	VORPS(xmm_float, xmm_nonsign, xmm_sign)
+
+	return xmm_float
diff --git a/include/fp16/avx2.py b/include/fp16/avx2.py
new file mode 100644
index 0000000..b0653e3
--- /dev/null
+++ b/include/fp16/avx2.py
@@ -0,0 +1,53 @@
+from peachpy import *
+from peachpy.x86_64 import *
+
+
+def fp16_alt_xmm_to_fp32_ymm(xmm_half):
+	ymm_half = YMMRegister()
+	VPERMQ(ymm_half, xmm_half.as_ymm, 0b01010000)
+
+	ymm_zero = YMMRegister()
+	VPXOR(ymm_zero.as_xmm, ymm_zero.as_xmm, ymm_zero.as_xmm)
+
+	ymm_word = YMMRegister()
+	VPUNPCKLWD(ymm_word, ymm_zero, ymm_half)
+
+	ymm_shl1_half = YMMRegister()
+	VPADDW(ymm_shl1_half, ymm_half, ymm_half)
+
+	ymm_shl1_nonsign = YMMRegister()
+	VPADDD(ymm_shl1_nonsign, ymm_word, ymm_word)
+
+	sign_mask = Constant.float32x8(-0.0)
+
+	ymm_sign = YMMRegister()
+	VANDPS(ymm_sign, ymm_word, sign_mask)
+
+	ymm_shr3_nonsign = YMMRegister()
+	VPSRLD(ymm_shr3_nonsign, ymm_shl1_nonsign, 4)
+
+	exp_offset = Constant.uint32x8(0x38000000)
+
+	ymm_norm_nonsign = YMMRegister()
+	VPADDD(ymm_norm_nonsign, ymm_shr3_nonsign, exp_offset)
+
+	magic_mask = Constant.uint16x16(0x3E80)
+	ymm_denorm_nonsign = YMMRegister()
+	VPUNPCKLWD(ymm_denorm_nonsign, ymm_shl1_half, magic_mask)
+
+	magic_bias = Constant.float32x8(0.25)
+	VSUBPS(ymm_denorm_nonsign, ymm_denorm_nonsign, magic_bias)
+
+	ymm_denorm_cutoff = YMMRegister()
+	VMOVDQA(ymm_denorm_cutoff, Constant.uint32x8(0x00800000))
+	
+	ymm_denorm_mask = YMMRegister()
+	VPCMPGTD(ymm_denorm_mask, ymm_denorm_cutoff, ymm_shr3_nonsign)
+
+	ymm_nonsign = YMMRegister()
+	VBLENDVPS(ymm_nonsign, ymm_norm_nonsign, ymm_denorm_nonsign, ymm_denorm_mask)
+
+	ymm_float = YMMRegister()
+	VORPS(ymm_float, ymm_nonsign, ymm_sign)
+
+	return ymm_float
diff --git a/include/fp16/bitcasts.h b/include/fp16/bitcasts.h
new file mode 100644
index 0000000..26a755c
--- /dev/null
+++ b/include/fp16/bitcasts.h
@@ -0,0 +1,76 @@
+#pragma once
+#ifndef FP16_BITCASTS_H
+#define FP16_BITCASTS_H
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+	#include <cstdint>
+#elif !defined(__OPENCL_VERSION__)
+	#include <stdint.h>
+#endif
+
+
+static inline float fp32_from_bits(uint32_t w) {
+#if defined(__OPENCL_VERSION__)
+	return as_float(w);
+#elif defined(__CUDA_ARCH__)
+	return __uint_as_float((unsigned int) w);
+#elif defined(__INTEL_COMPILER)
+	return _castu32_f32(w);
+#else
+	union {
+		uint32_t as_bits;
+		float as_value;
+	} fp32 = { w };
+	return fp32.as_value;
+#endif
+}
+
+static inline uint32_t fp32_to_bits(float f) {
+#if defined(__OPENCL_VERSION__)
+	return as_uint(f);
+#elif defined(__CUDA_ARCH__)
+	return (uint32_t) __float_as_uint(f);
+#elif defined(__INTEL_COMPILER)
+	return _castf32_u32(f);
+#else
+	union {
+		float as_value;
+		uint32_t as_bits;
+	} fp32 = { f };
+	return fp32.as_bits;
+#endif
+}
+
+static inline double fp64_from_bits(uint64_t w) {
+#if defined(__OPENCL_VERSION__)
+	return as_double(w);
+#elif defined(__CUDA_ARCH__)
+	return __longlong_as_double((long long) w);
+#elif defined(__INTEL_COMPILER)
+	return _castu64_f64(w);
+#else
+	union {
+		uint64_t as_bits;
+		double as_value;
+	} fp64 = { w };
+	return fp64.as_value;
+#endif
+}
+
+static inline uint64_t fp64_to_bits(double f) {
+#if defined(__OPENCL_VERSION__)
+	return as_ulong(f);
+#elif defined(__CUDA_ARCH__)
+	return (uint64_t) __double_as_longlong(f);
+#elif defined(__INTEL_COMPILER)
+	return _castf64_u64(f);
+#else
+	union {
+		double as_value;
+		uint64_t as_bits;
+	} fp64 = { f };
+	return fp64.as_bits;
+#endif
+}
+
+#endif /* FP16_BITCASTS_H */
diff --git a/include/fp16/fp16.h b/include/fp16/fp16.h
new file mode 100644
index 0000000..43a893a
--- /dev/null
+++ b/include/fp16/fp16.h
@@ -0,0 +1,451 @@
+#pragma once
+#ifndef FP16_FP16_H
+#define FP16_FP16_H
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+	#include <cstdint>
+	#include <cmath>
+#elif !defined(__OPENCL_VERSION__)
+	#include <stdint.h>
+	#include <math.h>
+#endif
+
+#ifdef _MSC_VER
+	#include <intrin.h>
+#endif
+
+#include <fp16/bitcasts.h>
+
+
+/*
+ * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
+ * a 32-bit floating-point number in IEEE single-precision format, in bit representation.
+ *
+ * @note The implementation doesn't use any floating-point operations.
+ */
+static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) {
+	/*
+	 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
+	 *      +---+-----+------------+-------------------+
+	 *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  31  26-30    16-25            0-15
+	 *
+	 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
+	 */
+	const uint32_t w = (uint32_t) h << 16;
+	/*
+	 * Extract the sign of the input number into the high bit of the 32-bit word:
+	 *
+	 *      +---+----------------------------------+
+	 *      | S |0000000 00000000 00000000 00000000|
+	 *      +---+----------------------------------+
+	 * Bits  31                 0-31
+	 */
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	/*
+	 * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word:
+	 *
+	 *      +---+-----+------------+-------------------+
+	 *      | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  30  27-31     17-26            0-16
+	 */
+	const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF);
+	/*
+	 * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized.
+	 * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one.
+	 * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift
+	 * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the
+	 * biased exponent into 1, and making mantissa normalized (i.e. without leading 1).
+	 */
+#ifdef _MSC_VER
+	unsigned long nonsign_bsr;
+	_BitScanReverse(&nonsign_bsr, (unsigned long) nonsign);
+	uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31;
+#else
+	uint32_t renorm_shift = __builtin_clz(nonsign);
+#endif
+	renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0;
+	/*
+	 * Iff half-precision number has exponent of 15, the addition overflows it into bit 31,
+	 * and the subsequent shift turns the high 9 bits into 1. Thus
+	 *   inf_nan_mask ==
+	 *                   0x7F800000 if the half-precision number had exponent of 15 (i.e. was NaN or infinity)
+	 *                   0x00000000 otherwise
+	 */
+	const int32_t inf_nan_mask = ((int32_t) (nonsign + 0x04000000) >> 8) & INT32_C(0x7F800000);
+	/*
+	 * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0.
+	 * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus
+	 *   zero_mask ==
+	 *                0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h)
+	 *                0x00000000 otherwise
+	 */
+	const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31;
+	/*
+	 * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal)
+	 * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa
+	 *    shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number.
+	 * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias
+	 *    (0x7F for single-precision number less 0xF for half-precision number).
+	 * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift
+	 *    is less than 0x70, this can be combined with step 3.
+	 * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the input was NaN or infinity.
+	 * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. 
+	 * 7. Combine with the sign of the input number.
+	 */
+	return sign | ((((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) | inf_nan_mask) & ~zero_mask);
+}
+
+/*
+ * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
+ * a 32-bit floating-point number in IEEE single-precision format.
+ *
+ * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
+ * floating-point operations and bitcasts between integer and floating-point variables.
+ */
+static inline float fp16_ieee_to_fp32_value(uint16_t h) {
+	/*
+	 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
+	 *      +---+-----+------------+-------------------+
+	 *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  31  26-30    16-25            0-15
+	 *
+	 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
+	 */
+	const uint32_t w = (uint32_t) h << 16;
+	/*
+	 * Extract the sign of the input number into the high bit of the 32-bit word:
+	 *
+	 *      +---+----------------------------------+
+	 *      | S |0000000 00000000 00000000 00000000|
+	 *      +---+----------------------------------+
+	 * Bits  31                 0-31
+	 */
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	/*
+	 * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word:
+	 *
+	 *      +-----+------------+---------------------+
+	 *      |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000|
+	 *      +-----+------------+---------------------+
+	 * Bits  27-31    17-26            0-16
+	 */
+	const uint32_t two_w = w + w;
+
+	/*
+	 * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent
+	 * of a single-precision floating-point number:
+	 *
+	 *       S|Exponent |          Mantissa
+	 *      +-+---+-----+------------+----------------+
+	 *      |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000|
+	 *      +-+---+-----+------------+----------------+
+	 * Bits   | 23-31   |           0-22
+	 *
+	 * Next, there are some adjustments to the exponent:
+	 * - The exponent needs to be corrected by the difference in exponent bias between single-precision and half-precision
+	 *   formats (0x7F - 0xF = 0x70)
+	 * - Inf and NaN values in the inputs should become Inf and NaN values after conversion to the single-precision number.
+	 *   Therefore, if the biased exponent of the half-precision input was 0x1F (max possible value), the biased exponent
+	 *   of the single-precision output must be 0xFF (max possible value). We do this correction in two steps:
+	 *   - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset below) rather than by 0x70 suggested
+	 *     by the difference in the exponent bias (see above).
+	 *   - Then we multiply the single-precision result of exponent adjustment by 2**(-112) to reverse the effect of
+	 *     exponent adjustment by 0xE0 less the necessary exponent adjustment by 0x70 due to difference in exponent bias.
+	 *     The floating-point multiplication hardware would ensure than Inf and NaN would retain their value on at least
+	 *     partially IEEE754-compliant implementations.
+	 *
+	 * Note that the above operations do not handle denormal inputs (where biased exponent == 0). However, they also do not
+	 * operate on denormal inputs, and do not produce denormal results.
+	 */
+	const uint32_t exp_offset = UINT32_C(0xE0) << 23;
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+	const float exp_scale = 0x1.0p-112f;
+#else
+	const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
+#endif
+	const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
+
+	/*
+	 * Convert denormalized half-precision inputs into single-precision results (always normalized).
+	 * Zero inputs are also handled here.
+	 *
+	 * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits.
+	 * First, we shift mantissa into bits 0-9 of the 32-bit word.
+	 *
+	 *                  zeros           |  mantissa
+	 *      +---------------------------+------------+
+	 *      |0000 0000 0000 0000 0000 00|MM MMMM MMMM|
+	 *      +---------------------------+------------+
+	 * Bits             10-31                0-9
+	 *
+	 * Now, remember that denormalized half-precision numbers are represented as:
+	 *    FP16 = mantissa * 2**(-24).
+	 * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input
+	 * and with an exponent which would scale the corresponding mantissa bits to 2**(-24).
+	 * A normalized single-precision floating-point number is represented as:
+	 *    FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127)
+	 * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision
+	 * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount.
+	 *
+	 * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number
+	 * is zero, the constructed single-precision number has the value of
+	 *    FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5
+	 * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of
+	 * the input half-precision number.
+	 */
+	const uint32_t magic_mask = UINT32_C(126) << 23;
+	const float magic_bias = 0.5f;
+	const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
+
+	/*
+	 * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the
+	 *   input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the
+	 *   input is either a denormal number, or zero.
+	 * - Combine the result of conversion of exponent and mantissa with the sign of the input number.
+	 */
+	const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
+	const uint32_t result = sign |
+		(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
+	return fp32_from_bits(result);
+}
+
+/*
+ * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in
+ * IEEE half-precision format, in bit representation.
+ *
+ * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
+ * floating-point operations and bitcasts between integer and floating-point variables.
+ */
+static inline uint16_t fp16_ieee_from_fp32_value(float f) {
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+	const float scale_to_inf = 0x1.0p+112f;
+	const float scale_to_zero = 0x1.0p-110f;
+#else
+	const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
+	const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
+#endif
+	float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
+
+	const uint32_t w = fp32_to_bits(f);
+	const uint32_t shl1_w = w + w;
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	uint32_t bias = shl1_w & UINT32_C(0xFF000000);
+	if (bias < UINT32_C(0x71000000)) {
+		bias = UINT32_C(0x71000000);
+	}
+
+	base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
+	const uint32_t bits = fp32_to_bits(base);
+	const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
+	const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
+	const uint32_t nonsign = exp_bits + mantissa_bits;
+	return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
+}
+
+/*
+ * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to
+ * a 32-bit floating-point number in IEEE single-precision format, in bit representation.
+ *
+ * @note The implementation doesn't use any floating-point operations.
+ */
+static inline uint32_t fp16_alt_to_fp32_bits(uint16_t h) {
+	/*
+	 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
+	 *      +---+-----+------------+-------------------+
+	 *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  31  26-30    16-25            0-15
+	 *
+	 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
+	 */
+	const uint32_t w = (uint32_t) h << 16;
+	/*
+	 * Extract the sign of the input number into the high bit of the 32-bit word:
+	 *
+	 *      +---+----------------------------------+
+	 *      | S |0000000 00000000 00000000 00000000|
+	 *      +---+----------------------------------+
+	 * Bits  31                 0-31
+	 */
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	/*
+	 * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word:
+	 *
+	 *      +---+-----+------------+-------------------+
+	 *      | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  30  27-31     17-26            0-16
+	 */
+	const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF);
+	/*
+	 * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized.
+	 * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one.
+	 * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift
+	 * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the
+	 * biased exponent into 1, and making mantissa normalized (i.e. without leading 1).
+	 */
+#ifdef _MSC_VER
+	unsigned long nonsign_bsr;
+	_BitScanReverse(&nonsign_bsr, (unsigned long) nonsign);
+	uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31;
+#else
+	uint32_t renorm_shift = __builtin_clz(nonsign);
+#endif
+	renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0;
+	/*
+	 * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0.
+	 * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus
+	 *   zero_mask ==
+	 *                0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h)
+	 *                0x00000000 otherwise
+	 */
+	const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31;
+	/*
+	 * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal)
+	 * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa
+	 *    shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number.
+	 * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias
+	 *    (0x7F for single-precision number less 0xF for half-precision number).
+	 * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift
+	 *    is less than 0x70, this can be combined with step 3.
+	 * 5. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. 
+	 * 6. Combine with the sign of the input number.
+	 */
+	return sign | (((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) & ~zero_mask);
+}
+
+/*
+ * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to
+ * a 32-bit floating-point number in IEEE single-precision format.
+ *
+ * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
+ * floating-point operations and bitcasts between integer and floating-point variables.
+ */
+static inline float fp16_alt_to_fp32_value(uint16_t h) {
+	/*
+	 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
+	 *      +---+-----+------------+-------------------+
+	 *      | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
+	 *      +---+-----+------------+-------------------+
+	 * Bits  31  26-30    16-25            0-15
+	 *
+	 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
+	 */
+	const uint32_t w = (uint32_t) h << 16;
+	/*
+	 * Extract the sign of the input number into the high bit of the 32-bit word:
+	 *
+	 *      +---+----------------------------------+
+	 *      | S |0000000 00000000 00000000 00000000|
+	 *      +---+----------------------------------+
+	 * Bits  31                 0-31
+	 */
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	/*
+	 * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word:
+	 *
+	 *      +-----+------------+---------------------+
+	 *      |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000|
+	 *      +-----+------------+---------------------+
+	 * Bits  27-31    17-26            0-16
+	 */
+	const uint32_t two_w = w + w;
+
+	/*
+	 * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent
+	 * of a single-precision floating-point number:
+	 *
+	 *       S|Exponent |          Mantissa
+	 *      +-+---+-----+------------+----------------+
+	 *      |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000|
+	 *      +-+---+-----+------------+----------------+
+	 * Bits   | 23-31   |           0-22
+	 *
+	 * Next, the exponent is adjusted for the difference in exponent bias between single-precision and half-precision
+	 * formats (0x7F - 0xF = 0x70). This operation never overflows or generates non-finite values, as the largest
+	 * half-precision exponent is 0x1F and after the adjustment is can not exceed 0x8F < 0xFE (largest single-precision
+	 * exponent for non-finite values).
+	 *
+	 * Note that this operation does not handle denormal inputs (where biased exponent == 0). However, they also do not
+	 * operate on denormal inputs, and do not produce denormal results.
+	 */
+	const float exp_offset = UINT32_C(0x70) << 23;
+	const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset);
+
+	/*
+	 * Convert denormalized half-precision inputs into single-precision results (always normalized).
+	 * Zero inputs are also handled here.
+	 *
+	 * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits.
+	 * First, we shift mantissa into bits 0-9 of the 32-bit word.
+	 *
+	 *                  zeros           |  mantissa
+	 *      +---------------------------+------------+
+	 *      |0000 0000 0000 0000 0000 00|MM MMMM MMMM|
+	 *      +---------------------------+------------+
+	 * Bits             10-31                0-9
+	 *
+	 * Now, remember that denormalized half-precision numbers are represented as:
+	 *    FP16 = mantissa * 2**(-24).
+	 * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input
+	 * and with an exponent which would scale the corresponding mantissa bits to 2**(-24).
+	 * A normalized single-precision floating-point number is represented as:
+	 *    FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127)
+	 * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision
+	 * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount.
+	 *
+	 * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number
+	 * is zero, the constructed single-precision number has the value of
+	 *    FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5
+	 * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of
+	 * the input half-precision number.
+	 */
+	const uint32_t magic_mask = UINT32_C(126) << 23;
+	const float magic_bias = 0.5f;
+	const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
+
+	/*
+	 * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the
+	 *   input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the
+	 *   input is either a denormal number, or zero.
+	 * - Combine the result of conversion of exponent and mantissa with the sign of the input number.
+	 */
+	const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
+	const uint32_t result = sign |
+		(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
+	return fp32_from_bits(result);
+}
+
+/*
+ * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in
+ * ARM alternative half-precision format, in bit representation.
+ *
+ * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
+ * floating-point operations and bitcasts between integer and floating-point variables.
+ */
+static inline uint16_t fp16_alt_from_fp32_value(float f) {
+	const uint32_t w = fp32_to_bits(f);
+	const uint32_t sign = w & UINT32_C(0x80000000);
+	const uint32_t shl1_w = w + w;
+
+	const uint32_t shl1_max_fp16_fp32 = UINT32_C(0x8FFFC000);
+	const uint32_t shl1_base = shl1_w > shl1_max_fp16_fp32 ? shl1_max_fp16_fp32 : shl1_w;
+	uint32_t shl1_bias = shl1_base & UINT32_C(0xFF000000);
+	const uint32_t exp_difference = 23 - 10;
+	const uint32_t shl1_bias_min = (127 - 1 - exp_difference) << 24;
+	if (shl1_bias < shl1_bias_min) {
+		shl1_bias = shl1_bias_min;
+	}
+
+	const float bias = fp32_from_bits((shl1_bias >> 1) + ((exp_difference + 2) << 23));
+	const float base = fp32_from_bits((shl1_base >> 1) + (2 << 23)) + bias;
+
+	const uint32_t exp_f = fp32_to_bits(base) >> 13;
+	return (sign >> 16) | ((exp_f & UINT32_C(0x00007C00)) + (fp32_to_bits(base) & UINT32_C(0x00000FFF)));
+}
+
+#endif /* FP16_FP16_H */
diff --git a/include/fp16/psimd.h b/include/fp16/psimd.h
new file mode 100644
index 0000000..428ab06
--- /dev/null
+++ b/include/fp16/psimd.h
@@ -0,0 +1,131 @@
+#pragma once
+#ifndef FP16_PSIMD_H
+#define FP16_PSIMD_H
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+	#include <cstdint>
+#elif !defined(__OPENCL_VERSION__)
+	#include <stdint.h>
+#endif
+
+#include <psimd.h>
+
+
+PSIMD_INTRINSIC psimd_f32 fp16_ieee_to_fp32_psimd(psimd_u16 half) {
+	const psimd_u32 word = (psimd_u32) psimd_interleave_lo_u16(psimd_zero_u16(), half);
+
+	const psimd_u32 sign = word & psimd_splat_u32(UINT32_C(0x80000000));
+	const psimd_u32 shr3_nonsign = (word + word) >> psimd_splat_u32(4);
+
+	const psimd_u32 exp_offset = psimd_splat_u32(UINT32_C(0x70000000));
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+	const psimd_f32 exp_scale = psimd_splat_f32(0x1.0p-112f);
+#else
+	const psimd_f32 exp_scale = psimd_splat_f32(fp32_from_bits(UINT32_C(0x7800000)));
+#endif
+	const psimd_f32 norm_nonsign = psimd_mul_f32((psimd_f32) (shr3_nonsign + exp_offset), exp_scale);
+
+	const psimd_u16 magic_mask = psimd_splat_u16(UINT16_C(0x3E80));
+	const psimd_f32 magic_bias = psimd_splat_f32(0.25f);
+	const psimd_f32 denorm_nonsign = psimd_sub_f32((psimd_f32) psimd_interleave_lo_u16(half + half, magic_mask), magic_bias);
+
+	const psimd_s32 denorm_cutoff = psimd_splat_s32(INT32_C(0x00800000));
+	const psimd_s32 denorm_mask = (psimd_s32) shr3_nonsign < denorm_cutoff;
+	return (psimd_f32) (sign | (psimd_s32) psimd_blend_f32(denorm_mask, denorm_nonsign, norm_nonsign));
+}
+
+PSIMD_INTRINSIC psimd_f32x2 fp16_ieee_to_fp32x2_psimd(psimd_u16 half) {
+	const psimd_u32 word_lo = (psimd_u32) psimd_interleave_lo_u16(psimd_zero_u16(), half);
+	const psimd_u32 word_hi = (psimd_u32) psimd_interleave_hi_u16(psimd_zero_u16(), half);
+
+	const psimd_u32 sign_mask = psimd_splat_u32(UINT32_C(0x80000000));
+	const psimd_u32 sign_lo = word_lo & sign_mask;
+	const psimd_u32 sign_hi = word_hi & sign_mask;
+	const psimd_u32 shr3_nonsign_lo = (word_lo + word_lo) >> psimd_splat_u32(4);
+	const psimd_u32 shr3_nonsign_hi = (word_hi + word_hi) >> psimd_splat_u32(4);
+
+	const psimd_u32 exp_offset = psimd_splat_u32(UINT32_C(0x70000000));
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+	const psimd_f32 exp_scale = psimd_splat_f32(0x1.0p-112f);
+#else
+	const psimd_f32 exp_scale = psimd_splat_f32(fp32_from_bits(UINT32_C(0x7800000)));
+#endif
+	const psimd_f32 norm_nonsign_lo = psimd_mul_f32((psimd_f32) (shr3_nonsign_lo + exp_offset), exp_scale);
+	const psimd_f32 norm_nonsign_hi = psimd_mul_f32((psimd_f32) (shr3_nonsign_hi + exp_offset), exp_scale);
+
+	const psimd_u16 magic_mask = psimd_splat_u16(UINT16_C(0x3E80));
+	const psimd_u16 shl1_half = half + half;
+	const psimd_f32 magic_bias = psimd_splat_f32(0.25f);
+	const psimd_f32 denorm_nonsign_lo = psimd_sub_f32((psimd_f32) psimd_interleave_lo_u16(shl1_half, magic_mask), magic_bias);
+	const psimd_f32 denorm_nonsign_hi = psimd_sub_f32((psimd_f32) psimd_interleave_hi_u16(shl1_half, magic_mask), magic_bias);
+
+	const psimd_s32 denorm_cutoff = psimd_splat_s32(INT32_C(0x00800000));
+	const psimd_s32 denorm_mask_lo = (psimd_s32) shr3_nonsign_lo < denorm_cutoff;
+	const psimd_s32 denorm_mask_hi = (psimd_s32) shr3_nonsign_hi < denorm_cutoff;
+
+	psimd_f32x2 result;
+	result.lo = (psimd_f32) (sign_lo | (psimd_s32) psimd_blend_f32(denorm_mask_lo, denorm_nonsign_lo, norm_nonsign_lo));
+	result.hi = (psimd_f32) (sign_hi | (psimd_s32) psimd_blend_f32(denorm_mask_hi, denorm_nonsign_hi, norm_nonsign_hi));
+	return result;
+}
+
+PSIMD_INTRINSIC psimd_f32 fp16_alt_to_fp32_psimd(psimd_u16 half) {
+	const psimd_u32 word = (psimd_u32) psimd_interleave_lo_u16(psimd_zero_u16(), half);
+
+	const psimd_u32 sign = word & psimd_splat_u32(INT32_C(0x80000000));
+	const psimd_u32 shr3_nonsign = (word + word) >> psimd_splat_u32(4);
+
+#if 0
+	const psimd_s32 exp112_offset = psimd_splat_s32(INT32_C(0x38000000));
+	const psimd_s32 nonsign_bits = (psimd_s32) shr3_nonsign + exp112_offset;
+	const psimd_s32 exp1_offset = psimd_splat_s32(INT32_C(0x00800000));
+	const psimd_f32 two_nonsign = (psimd_f32) (nonsign_bits + exp1_offset);
+	const psimd_s32 exp113_offset = exp112_offset | exp1_offset;
+	return (psimd_f32) (sign | (psimd_s32) psimd_sub_f32(two_nonsign, (psimd_f32) psimd_max_s32(nonsign_bits, exp113_offset)));
+#else
+	const psimd_u32 exp_offset = psimd_splat_u32(UINT32_C(0x38000000));
+	const psimd_f32 nonsign = (psimd_f32) (shr3_nonsign + exp_offset);
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+	const psimd_f32 denorm_bias = psimd_splat_f32(0x1.0p-14f);
+#else
+	const psimd_f32 denorm_bias = psimd_splat_f32(fp32_from_bits(UINT32_C(0x38800000)));
+#endif
+	return (psimd_f32) (sign | (psimd_s32) psimd_sub_f32(psimd_add_f32(nonsign, nonsign), psimd_max_f32(nonsign, denorm_bias)));
+#endif
+}
+
+PSIMD_INTRINSIC psimd_f32x2 fp16_alt_to_fp32x2_psimd(psimd_u16 half) {
+	const psimd_u32 word_lo = (psimd_u32) psimd_interleave_lo_u16(psimd_zero_u16(), half);
+	const psimd_u32 word_hi = (psimd_u32) psimd_interleave_hi_u16(psimd_zero_u16(), half);
+
+	const psimd_u32 sign_mask = psimd_splat_u32(UINT32_C(0x80000000));
+	const psimd_u32 sign_lo = word_lo & sign_mask;
+	const psimd_u32 sign_hi = word_hi & sign_mask;
+	const psimd_u32 shr3_nonsign_lo = (word_lo + word_lo) >> psimd_splat_u32(4);
+	const psimd_u32 shr3_nonsign_hi = (word_hi + word_hi) >> psimd_splat_u32(4);
+
+#if 1
+	const psimd_s32 exp112_offset = psimd_splat_s32(INT32_C(0x38000000));
+	const psimd_s32 nonsign_bits_lo = (psimd_s32) shr3_nonsign_lo + exp112_offset;
+	const psimd_s32 nonsign_bits_hi = (psimd_s32) shr3_nonsign_hi + exp112_offset;
+	const psimd_s32 exp1_offset = psimd_splat_s32(INT32_C(0x00800000));
+	const psimd_f32 two_nonsign_lo = (psimd_f32) (nonsign_bits_lo + exp1_offset);
+	const psimd_f32 two_nonsign_hi = (psimd_f32) (nonsign_bits_hi + exp1_offset);
+	const psimd_s32 exp113_offset = exp1_offset | exp112_offset;
+	psimd_f32x2 result;
+	result.lo = (psimd_f32) (sign_lo | (psimd_s32) psimd_sub_f32(two_nonsign_lo, (psimd_f32) psimd_max_s32(nonsign_bits_lo, exp113_offset)));
+	result.hi = (psimd_f32) (sign_hi | (psimd_s32) psimd_sub_f32(two_nonsign_hi, (psimd_f32) psimd_max_s32(nonsign_bits_hi, exp113_offset)));
+	return result;
+#else
+	const psimd_u32 exp_offset = psimd_splat_u32(UINT32_C(0x38000000));
+	const psimd_f32 nonsign_lo = (psimd_f32) (shr3_nonsign_lo + exp_offset);
+	const psimd_f32 nonsign_hi = (psimd_f32) (shr3_nonsign_hi + exp_offset);
+	const psimd_f32 denorm_bias = psimd_splat_f32(0x1.0p-14f);
+	psimd_f32x2 result;
+	result.lo = (psimd_f32) (sign_lo | (psimd_s32) psimd_sub_f32(psimd_add_f32(nonsign_lo, nonsign_lo), psimd_max_f32(nonsign_lo, denorm_bias)));
+	result.hi = (psimd_f32) (sign_hi | (psimd_s32) psimd_sub_f32(psimd_add_f32(nonsign_hi, nonsign_hi), psimd_max_f32(nonsign_hi, denorm_bias)));
+	return result;
+#endif
+}
+
+#endif /* FP16_PSIMD_H */
diff --git a/test/alt-from-fp32-value.cc b/test/alt-from-fp32-value.cc
new file mode 100644
index 0000000..b8272f6
--- /dev/null
+++ b/test/alt-from-fp32-value.cc
@@ -0,0 +1,498 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include "tables.h"
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	#include <x86intrin.h>
+#endif
+
+
+TEST(FP16_ALT_FROM_FP32_VALUE, normalized_powers_of_2) {
+	const uint16_t min_po2_f16   = UINT16_C(0x0400);
+	const uint16_t eighths_f16   = UINT16_C(0x3000);
+	const uint16_t quarter_f16   = UINT16_C(0x3400);
+	const uint16_t half_f16      = UINT16_C(0x3800);
+	const uint16_t one_f16       = UINT16_C(0x3C00);
+	const uint16_t two_f16       = UINT16_C(0x4000);
+	const uint16_t four_f16      = UINT16_C(0x4400);
+	const uint16_t eight_f16     = UINT16_C(0x4800);
+	const uint16_t sixteen_f16   = UINT16_C(0x4C00);
+	const uint16_t thirtytwo_f16 = UINT16_C(0x5000);
+	const uint16_t sixtyfour_f16 = UINT16_C(0x5400);
+	const uint16_t max_po2_f16   = UINT16_C(0x7C00);
+
+	const uint32_t min_po2_f32   = UINT32_C(0x38800000);
+	const uint32_t eighths_f32   = UINT32_C(0x3E000000);
+	const uint32_t quarter_f32   = UINT32_C(0x3E800000);
+	const uint32_t half_f32      = UINT32_C(0x3F000000);
+	const uint32_t one_f32       = UINT32_C(0x3F800000);
+	const uint32_t two_f32       = UINT32_C(0x40000000);
+	const uint32_t four_f32      = UINT32_C(0x40800000);
+	const uint32_t eight_f32     = UINT32_C(0x41000000);
+	const uint32_t sixteen_f32   = UINT32_C(0x41800000);
+	const uint32_t thirtytwo_f32 = UINT32_C(0x42000000);
+	const uint32_t sixtyfour_f32 = UINT32_C(0x42800000);
+	const uint32_t max_po2_f32   = UINT32_C(0x47800000);
+
+	float min_po2_value;
+	memcpy(&min_po2_value, &min_po2_f32, sizeof(min_po2_value));
+	EXPECT_EQ(min_po2_f16, fp16_alt_from_fp32_value(min_po2_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << min_po2_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(min_po2_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << min_po2_f16;
+
+	float eighths_value;
+	memcpy(&eighths_value, &eighths_f32, sizeof(eighths_value));
+	EXPECT_EQ(eighths_f16, fp16_alt_from_fp32_value(eighths_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << eighths_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(eighths_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << eighths_f16;
+
+	float quarter_value;
+	memcpy(&quarter_value, &quarter_f32, sizeof(quarter_value));
+	EXPECT_EQ(quarter_f16, fp16_alt_from_fp32_value(quarter_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << quarter_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(quarter_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << quarter_f16;
+
+	float half_value;
+	memcpy(&half_value, &half_f32, sizeof(half_value));
+	EXPECT_EQ(half_f16, fp16_alt_from_fp32_value(half_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << half_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(half_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << half_f16;
+
+	float one_value;
+	memcpy(&one_value, &one_f32, sizeof(one_value));
+	EXPECT_EQ(one_f16, fp16_alt_from_fp32_value(one_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << one_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(one_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << one_f16;
+
+	float two_value;
+	memcpy(&two_value, &two_f32, sizeof(two_value));
+	EXPECT_EQ(two_f16, fp16_alt_from_fp32_value(two_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << two_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(two_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << two_f16;
+
+	float four_value;
+	memcpy(&four_value, &four_f32, sizeof(four_value));
+	EXPECT_EQ(four_f16, fp16_alt_from_fp32_value(four_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << four_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(four_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << four_f16;
+
+	float eight_value;
+	memcpy(&eight_value, &eight_f32, sizeof(eight_value));
+	EXPECT_EQ(eight_f16, fp16_alt_from_fp32_value(eight_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << eight_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(eight_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << eight_f16;
+
+	float sixteen_value;
+	memcpy(&sixteen_value, &sixteen_f32, sizeof(sixteen_value));
+	EXPECT_EQ(sixteen_f16, fp16_alt_from_fp32_value(sixteen_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << sixteen_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(sixteen_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << sixteen_f16;
+
+	float thirtytwo_value;
+	memcpy(&thirtytwo_value, &thirtytwo_f32, sizeof(thirtytwo_value));
+	EXPECT_EQ(thirtytwo_f16, fp16_alt_from_fp32_value(thirtytwo_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << thirtytwo_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(thirtytwo_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << thirtytwo_f16;
+
+	float sixtyfour_value;
+	memcpy(&sixtyfour_value, &sixtyfour_f32, sizeof(sixtyfour_value));
+	EXPECT_EQ(sixtyfour_f16, fp16_alt_from_fp32_value(sixtyfour_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << sixtyfour_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(sixtyfour_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << sixtyfour_f16;
+
+	float max_po2_value;
+	memcpy(&max_po2_value, &max_po2_f32, sizeof(max_po2_value));
+	EXPECT_EQ(max_po2_f16, fp16_ieee_from_fp32_value(max_po2_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << max_po2_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(max_po2_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << max_po2_f16;
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, denormalized_powers_of_2) {
+	const uint16_t exp2_minus_15_f16 = UINT16_C(0x0200);
+	const uint16_t exp2_minus_16_f16 = UINT16_C(0x0100);
+	const uint16_t exp2_minus_17_f16 = UINT16_C(0x0080);
+	const uint16_t exp2_minus_18_f16 = UINT16_C(0x0040);
+	const uint16_t exp2_minus_19_f16 = UINT16_C(0x0020);
+	const uint16_t exp2_minus_20_f16 = UINT16_C(0x0010);
+	const uint16_t exp2_minus_21_f16 = UINT16_C(0x0008);
+	const uint16_t exp2_minus_22_f16 = UINT16_C(0x0004);
+	const uint16_t exp2_minus_23_f16 = UINT16_C(0x0002);
+	const uint16_t exp2_minus_24_f16 = UINT16_C(0x0001);
+	const uint16_t exp2_minus_25_f16 = UINT16_C(0x0000);
+
+	const uint32_t exp2_minus_15_f32 = UINT32_C(0x38000000);
+	const uint32_t exp2_minus_16_f32 = UINT32_C(0x37800000);
+	const uint32_t exp2_minus_17_f32 = UINT32_C(0x37000000);
+	const uint32_t exp2_minus_18_f32 = UINT32_C(0x36800000);
+	const uint32_t exp2_minus_19_f32 = UINT32_C(0x36000000);
+	const uint32_t exp2_minus_20_f32 = UINT32_C(0x35800000);
+	const uint32_t exp2_minus_21_f32 = UINT32_C(0x35000000);
+	const uint32_t exp2_minus_22_f32 = UINT32_C(0x34800000);
+	const uint32_t exp2_minus_23_f32 = UINT32_C(0x34000000);
+	const uint32_t exp2_minus_24_f32 = UINT32_C(0x33800000);
+	const uint32_t exp2_minus_25_f32 = UINT32_C(0x33000000);
+
+	float exp2_minus_15_value;
+	memcpy(&exp2_minus_15_value, &exp2_minus_15_f32, sizeof(exp2_minus_15_value));
+	EXPECT_EQ(exp2_minus_15_f16, fp16_alt_from_fp32_value(exp2_minus_15_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_15_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_15_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_15_f16;
+
+	float exp2_minus_16_value;
+	memcpy(&exp2_minus_16_value, &exp2_minus_16_f32, sizeof(exp2_minus_16_value));
+	EXPECT_EQ(exp2_minus_16_f16, fp16_alt_from_fp32_value(exp2_minus_16_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_16_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_16_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_16_f16;
+
+	float exp2_minus_17_value;
+	memcpy(&exp2_minus_17_value, &exp2_minus_17_f32, sizeof(exp2_minus_17_value));
+	EXPECT_EQ(exp2_minus_17_f16, fp16_alt_from_fp32_value(exp2_minus_17_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_17_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_17_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_17_f16;
+
+	float exp2_minus_18_value;
+	memcpy(&exp2_minus_18_value, &exp2_minus_18_f32, sizeof(exp2_minus_18_value));
+	EXPECT_EQ(exp2_minus_18_f16, fp16_alt_from_fp32_value(exp2_minus_18_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_18_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_18_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_18_f16;
+
+	float exp2_minus_19_value;
+	memcpy(&exp2_minus_19_value, &exp2_minus_19_f32, sizeof(exp2_minus_19_value));
+	EXPECT_EQ(exp2_minus_19_f16, fp16_alt_from_fp32_value(exp2_minus_19_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_19_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_19_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_19_f16;
+
+	float exp2_minus_20_value;
+	memcpy(&exp2_minus_20_value, &exp2_minus_20_f32, sizeof(exp2_minus_20_value));
+	EXPECT_EQ(exp2_minus_20_f16, fp16_alt_from_fp32_value(exp2_minus_20_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_20_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_20_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_20_f16;
+
+	float exp2_minus_21_value;
+	memcpy(&exp2_minus_21_value, &exp2_minus_21_f32, sizeof(exp2_minus_21_value));
+	EXPECT_EQ(exp2_minus_21_f16, fp16_alt_from_fp32_value(exp2_minus_21_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_21_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_21_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_21_f16;
+
+	float exp2_minus_22_value;
+	memcpy(&exp2_minus_22_value, &exp2_minus_22_f32, sizeof(exp2_minus_22_value));
+	EXPECT_EQ(exp2_minus_22_f16, fp16_alt_from_fp32_value(exp2_minus_22_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_22_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_22_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_22_f16;
+
+	float exp2_minus_23_value;
+	memcpy(&exp2_minus_23_value, &exp2_minus_23_f32, sizeof(exp2_minus_23_value));
+	EXPECT_EQ(exp2_minus_23_f16, fp16_alt_from_fp32_value(exp2_minus_23_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_23_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_23_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_23_f16;
+
+	float exp2_minus_24_value;
+	memcpy(&exp2_minus_24_value, &exp2_minus_24_f32, sizeof(exp2_minus_24_value));
+	EXPECT_EQ(exp2_minus_24_f16, fp16_alt_from_fp32_value(exp2_minus_24_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_24_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_24_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_24_f16;
+
+	float exp2_minus_25_value;
+	memcpy(&exp2_minus_25_value, &exp2_minus_25_f32, sizeof(exp2_minus_25_value));
+	EXPECT_EQ(exp2_minus_25_f16, fp16_alt_from_fp32_value(exp2_minus_25_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_25_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_25_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_25_f16;
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, zero) {
+	const uint16_t positive_zero_f16 = UINT16_C(0x0000);
+	const uint16_t negative_zero_f16 = UINT16_C(0x8000);
+
+	const uint32_t positive_zero_f32 = UINT32_C(0x00000000);
+	const uint32_t negative_zero_f32 = UINT32_C(0x80000000);
+
+	float positive_zero_value;
+	memcpy(&positive_zero_value, &positive_zero_f32, sizeof(positive_zero_value));
+	EXPECT_EQ(positive_zero_f16, fp16_alt_from_fp32_value(positive_zero_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << positive_zero_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(positive_zero_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << positive_zero_f16;
+
+	float negative_zero_value;
+	memcpy(&negative_zero_value, &negative_zero_f32, sizeof(negative_zero_value));
+	EXPECT_EQ(negative_zero_f16, fp16_alt_from_fp32_value(negative_zero_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << negative_zero_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(negative_zero_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << negative_zero_f16;
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, infinity) {
+	const uint16_t max_f16 = UINT16_C(0x7FFF);
+	const uint16_t min_f16 = UINT16_C(0xFFFF);
+
+	const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+	const uint32_t negative_infinity_f32 = UINT32_C(0xFF800000);
+
+	float positive_infinity_value;
+	memcpy(&positive_infinity_value, &positive_infinity_f32, sizeof(positive_infinity_value));
+	EXPECT_EQ(max_f16, fp16_alt_from_fp32_value(positive_infinity_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(positive_infinity_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << max_f16;
+
+	float negative_infinity_value;
+	memcpy(&negative_infinity_value, &negative_infinity_f32, sizeof(negative_infinity_value));
+	EXPECT_EQ(min_f16, fp16_alt_from_fp32_value(negative_infinity_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(negative_infinity_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << min_f16;
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, positive_nan) {
+	for (uint32_t nan_f32 = UINT32_C(0x7FFFFFFF); nan_f32 > UINT32_C(0x7F800000); nan_f32--) {
+		float nan_value;
+		memcpy(&nan_value, &nan_f32, sizeof(nan_value));
+		const uint16_t nan_f16 = fp16_alt_from_fp32_value(nan_value);
+
+		/* Check sign */
+		ASSERT_EQ(nan_f16 & UINT16_C(0x8000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+
+		/* Check exponent */
+		ASSERT_EQ(nan_f16 & UINT16_C(0x7C00), UINT16_C(0x7C00)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+
+		/* Check mantissa */
+		ASSERT_NE(nan_f16 & UINT16_C(0x03FF), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+	}
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, negative_nan) {
+	for (uint32_t nan_f32 = UINT32_C(0xFFFFFFFF); nan_f32 > UINT32_C(0xFF800000); nan_f32--) {
+		float nan_value;
+		memcpy(&nan_value, &nan_f32, sizeof(nan_value));
+		const uint16_t nan_f16 = fp16_alt_from_fp32_value(nan_value);
+
+		/* Check sign */
+		ASSERT_EQ(nan_f16 & UINT16_C(0x8000), UINT16_C(0x8000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+
+		/* Check exponent */
+		ASSERT_EQ(nan_f16 & UINT16_C(0x7C00), UINT16_C(0x7C00)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+
+		/* Check mantissa */
+		ASSERT_NE(nan_f16 & UINT16_C(0x03FF), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+	}
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, revertible) {
+	/* Positive values */
+	for (uint16_t f16 = UINT16_C(0x0000); f16 <= UINT16_C(0x7FFF); f16++) {
+		const float value_f32 = fp16_alt_to_fp32_value(f16);
+		uint32_t bits_f32;
+		memcpy(&bits_f32, &value_f32, sizeof(bits_f32));
+
+		ASSERT_EQ(f16, fp16_alt_from_fp32_value(value_f32)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << bits_f32 << ", " <<
+			"F16(F32(F16)) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value_f32);
+	}
+
+	/* Negative values */
+	for (uint16_t f16 = UINT16_C(0xFFFF); f16 >= UINT16_C(0x8000); f16--) {
+		const float value_f32 = fp16_alt_to_fp32_value(f16);
+		uint32_t bits_f32;
+		memcpy(&bits_f32, &value_f32, sizeof(bits_f32));
+
+		ASSERT_EQ(f16, fp16_alt_from_fp32_value(value_f32)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << bits_f32 << ", " <<
+			"F16(F32(F16)) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value_f32);
+	}
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, underflow) {
+	const uint32_t min_nonzero_f32 = UINT32_C(0x33000001);
+	const uint16_t zero_f16 = UINT16_C(0x0000);
+	const uint16_t min_f16 = UINT16_C(0x0001);
+	for (uint32_t bits = UINT32_C(0x00000001); bits < min_nonzero_f32; bits++) {
+		float value;
+		memcpy(&value, &bits, sizeof(value));
+		ASSERT_EQ(zero_f16, fp16_alt_from_fp32_value(value)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << bits << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " <<
+			"F16 = 0x" << std::setw(4) << zero_f16;
+	}
+	float min_nonzero_value;
+	memcpy(&min_nonzero_value, &min_nonzero_f32, sizeof(min_nonzero_value));
+	ASSERT_EQ(min_f16, fp16_alt_from_fp32_value(min_nonzero_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << min_nonzero_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(min_nonzero_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << min_f16;
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, saturation) {
+	const uint32_t max_f16_f32 = UINT32_C(0x47FFE000);
+	const uint16_t max_f16 = UINT16_C(0x7FFF);
+	const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+	for (uint32_t bits = positive_infinity_f32; bits > max_f16_f32; bits--) {
+		float value;
+		memcpy(&value, &bits, sizeof(value));
+		ASSERT_EQ(max_f16, fp16_alt_from_fp32_value(value)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << bits << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " <<
+			"F16 = 0x" << std::setw(4) << max_f16;
+	}
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, positive_denormalized_values) {
+	const uint32_t min_nonzero_f32 = UINT32_C(0x33000001);
+
+	uint32_t f32_begin = min_nonzero_f32;
+	for (uint16_t f16 = 0; f16 < UINT16_C(0x0400); f16++) {
+		const uint32_t f32_end = fp16::denormalizedRanges[f16];
+		for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) {
+			float value;
+			memcpy(&value, &f32, sizeof(value));
+			ASSERT_EQ(f16, fp16_alt_from_fp32_value(value)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F32 = 0x" << std::setw(8) << f32 << ", " <<
+				"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " <<
+				"F16 = 0x" << std::setw(4) << f16;
+		}
+		f32_begin = f32_end;
+	}
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, negative_denormalized_values) {
+	const uint32_t min_nonzero_f32 = UINT32_C(0x33000001);
+
+	uint32_t f32_begin = min_nonzero_f32 | UINT32_C(0x80000000);
+	for (uint16_t f16 = UINT16_C(0x8000); f16 < UINT16_C(0x8400); f16++) {
+		const uint32_t f32_end = fp16::denormalizedRanges[f16 & UINT16_C(0x7FFF)] | UINT32_C(0x80000000);
+		for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) {
+			float value;
+			memcpy(&value, &f32, sizeof(value));
+			ASSERT_EQ(f16, fp16_alt_from_fp32_value(value)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F32 = 0x" << std::setw(8) << f32 << ", " <<
+				"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " <<
+				"F16 = 0x" << std::setw(4) << f16;
+		}
+		f32_begin = f32_end;
+	}
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, positive_normalized_values) {
+	/* Minimum number that rounds to 1.0h when converted to half-precision */
+	const uint32_t min_one_f32 = UINT32_C(0x3F7FF000);
+	const uint32_t e_bias = 15;
+
+	for (int32_t e = -14; e <= 16; e++) {
+		uint32_t f32_begin = min_one_f32 + (uint32_t(e) << 23);
+		for (uint16_t f16 = uint16_t(e + e_bias) << 10; f16 < uint16_t(e + e_bias + 1) << 10; f16++) {
+			const uint32_t f32_end = fp16::normalizedRanges[f16 & UINT16_C(0x3FF)] + (uint32_t(e) << 23);
+			for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) {
+				float value;
+				memcpy(&value, &f32, sizeof(value));
+				ASSERT_EQ(f16, fp16_alt_from_fp32_value(value)) <<
+					std::hex << std::uppercase << std::setfill('0') <<
+					"F32 = 0x" << std::setw(8) << f32 << ", " <<
+					"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " <<
+					"F16 = 0x" << std::setw(4) << f16;
+			}
+			f32_begin = f32_end;
+		}
+	}
+}
+
+TEST(FP16_ALT_FROM_FP32_VALUE, negative_normalized_values) {
+	/* Minimum number that rounds to 1.0h when converted to half-precision */
+	const uint32_t min_one_f32 = UINT32_C(0x3F7FF000);
+	const uint32_t e_bias = 15;
+
+	for (int32_t e = -14; e <= 16; e++) {
+		uint32_t f32_begin = (min_one_f32 | UINT32_C(0x80000000)) + (uint32_t(e) << 23);
+		for (uint16_t f16 = (UINT16_C(0x8000) | (uint16_t(e + e_bias) << 10)); f16 < (UINT16_C(0x8000) | (uint16_t(e + e_bias + 1) << 10)); f16++) {
+			const uint32_t f32_end = (fp16::normalizedRanges[f16 & UINT16_C(0x3FF)] | UINT32_C(0x80000000)) + (uint32_t(e) << 23);
+			for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) {
+				float value;
+				memcpy(&value, &f32, sizeof(value));
+				ASSERT_EQ(f16, fp16_alt_from_fp32_value(value)) <<
+					std::hex << std::uppercase << std::setfill('0') <<
+					"F32 = 0x" << std::setw(8) << f32 << ", " <<
+					"F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " <<
+					"F16 = 0x" << std::setw(4) << f16;
+			}
+			f32_begin = f32_end;
+		}
+	}
+}
diff --git a/test/alt-to-fp32-bits.cc b/test/alt-to-fp32-bits.cc
new file mode 100644
index 0000000..09bdd95
--- /dev/null
+++ b/test/alt-to-fp32-bits.cc
@@ -0,0 +1,263 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include "tables.h"
+
+
+TEST(FP16_ALT_TO_FP32_BITS, normalized_powers_of_2) {
+	const uint16_t min_po2_f16   = UINT16_C(0x0400);
+	const uint16_t eighths_f16   = UINT16_C(0x3000);
+	const uint16_t quarter_f16   = UINT16_C(0x3400);
+	const uint16_t half_f16      = UINT16_C(0x3800);
+	const uint16_t one_f16       = UINT16_C(0x3C00);
+	const uint16_t two_f16       = UINT16_C(0x4000);
+	const uint16_t four_f16      = UINT16_C(0x4400);
+	const uint16_t eight_f16     = UINT16_C(0x4800);
+	const uint16_t sixteen_f16   = UINT16_C(0x4C00);
+	const uint16_t thirtytwo_f16 = UINT16_C(0x5000);
+	const uint16_t sixtyfour_f16 = UINT16_C(0x5400);
+	const uint16_t max_po2_f16   = UINT16_C(0x7C00);
+
+	const uint32_t min_po2_f32   = UINT32_C(0x38800000);
+	const uint32_t eighths_f32   = UINT32_C(0x3E000000);
+	const uint32_t quarter_f32   = UINT32_C(0x3E800000);
+	const uint32_t half_f32      = UINT32_C(0x3F000000);
+	const uint32_t one_f32       = UINT32_C(0x3F800000);
+	const uint32_t two_f32       = UINT32_C(0x40000000);
+	const uint32_t four_f32      = UINT32_C(0x40800000);
+	const uint32_t eight_f32     = UINT32_C(0x41000000);
+	const uint32_t sixteen_f32   = UINT32_C(0x41800000);
+	const uint32_t thirtytwo_f32 = UINT32_C(0x42000000);
+	const uint32_t sixtyfour_f32 = UINT32_C(0x42800000);
+	const uint32_t max_po2_f32   = UINT32_C(0x47800000);
+
+	EXPECT_EQ(min_po2_f32, fp16_alt_to_fp32_bits(min_po2_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << min_po2_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(min_po2_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << min_po2_f32;
+
+	EXPECT_EQ(eighths_f32, fp16_alt_to_fp32_bits(eighths_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << eighths_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(eighths_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << eighths_f32;
+
+	EXPECT_EQ(quarter_f32, fp16_alt_to_fp32_bits(quarter_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << quarter_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(quarter_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << quarter_f32;
+
+	EXPECT_EQ(half_f32, fp16_alt_to_fp32_bits(half_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << half_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(half_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << half_f32;
+
+	EXPECT_EQ(one_f32, fp16_alt_to_fp32_bits(one_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << one_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(one_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << one_f32;
+
+	EXPECT_EQ(two_f32, fp16_alt_to_fp32_bits(two_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << two_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(two_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << two_f32;
+
+	EXPECT_EQ(four_f32, fp16_alt_to_fp32_bits(four_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << four_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(four_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << four_f32;
+
+	EXPECT_EQ(eight_f32, fp16_alt_to_fp32_bits(eight_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << eight_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(eight_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << eight_f32;
+
+	EXPECT_EQ(sixteen_f32, fp16_alt_to_fp32_bits(sixteen_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << sixteen_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(sixteen_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << sixteen_f32;
+
+	EXPECT_EQ(thirtytwo_f32, fp16_alt_to_fp32_bits(thirtytwo_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << thirtytwo_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(thirtytwo_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << thirtytwo_f32;
+
+	EXPECT_EQ(sixtyfour_f32, fp16_alt_to_fp32_bits(sixtyfour_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << sixtyfour_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(sixtyfour_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << sixtyfour_f32;
+
+	EXPECT_EQ(max_po2_f32, fp16_alt_to_fp32_bits(max_po2_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << max_po2_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(max_po2_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << max_po2_f32;
+}
+
+TEST(FP16_ALT_TO_FP32_BITS, denormalized_powers_of_2) {
+	const uint16_t exp2_minus_15_f16 = UINT16_C(0x0200);
+	const uint16_t exp2_minus_16_f16 = UINT16_C(0x0100);
+	const uint16_t exp2_minus_17_f16 = UINT16_C(0x0080);
+	const uint16_t exp2_minus_18_f16 = UINT16_C(0x0040);
+	const uint16_t exp2_minus_19_f16 = UINT16_C(0x0020);
+	const uint16_t exp2_minus_20_f16 = UINT16_C(0x0010);
+	const uint16_t exp2_minus_21_f16 = UINT16_C(0x0008);
+	const uint16_t exp2_minus_22_f16 = UINT16_C(0x0004);
+	const uint16_t exp2_minus_23_f16 = UINT16_C(0x0002);
+	const uint16_t exp2_minus_24_f16 = UINT16_C(0x0001);
+
+	const uint32_t exp2_minus_15_f32 = UINT32_C(0x38000000);
+	const uint32_t exp2_minus_16_f32 = UINT32_C(0x37800000);
+	const uint32_t exp2_minus_17_f32 = UINT32_C(0x37000000);
+	const uint32_t exp2_minus_18_f32 = UINT32_C(0x36800000);
+	const uint32_t exp2_minus_19_f32 = UINT32_C(0x36000000);
+	const uint32_t exp2_minus_20_f32 = UINT32_C(0x35800000);
+	const uint32_t exp2_minus_21_f32 = UINT32_C(0x35000000);
+	const uint32_t exp2_minus_22_f32 = UINT32_C(0x34800000);
+	const uint32_t exp2_minus_23_f32 = UINT32_C(0x34000000);
+	const uint32_t exp2_minus_24_f32 = UINT32_C(0x33800000);
+
+	EXPECT_EQ(exp2_minus_15_f32, fp16_alt_to_fp32_bits(exp2_minus_15_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_15_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_15_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_15_f32;
+
+	EXPECT_EQ(exp2_minus_16_f32, fp16_alt_to_fp32_bits(exp2_minus_16_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_16_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_16_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_16_f32;
+
+	EXPECT_EQ(exp2_minus_17_f32, fp16_alt_to_fp32_bits(exp2_minus_17_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_17_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_17_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_17_f32;
+
+	EXPECT_EQ(exp2_minus_18_f32, fp16_alt_to_fp32_bits(exp2_minus_18_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_18_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_18_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_18_f32;
+
+	EXPECT_EQ(exp2_minus_19_f32, fp16_alt_to_fp32_bits(exp2_minus_19_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_19_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_19_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_19_f32;
+
+	EXPECT_EQ(exp2_minus_20_f32, fp16_alt_to_fp32_bits(exp2_minus_20_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_20_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_20_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_20_f32;
+
+	EXPECT_EQ(exp2_minus_21_f32, fp16_alt_to_fp32_bits(exp2_minus_21_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_21_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_21_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_21_f32;
+
+	EXPECT_EQ(exp2_minus_22_f32, fp16_alt_to_fp32_bits(exp2_minus_22_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_22_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_22_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_22_f32;
+
+	EXPECT_EQ(exp2_minus_23_f32, fp16_alt_to_fp32_bits(exp2_minus_23_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_23_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_23_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_23_f32;
+
+	EXPECT_EQ(exp2_minus_24_f32, fp16_alt_to_fp32_bits(exp2_minus_24_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_24_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(exp2_minus_24_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_24_f32;
+}
+
+TEST(FP16_ALT_TO_FP32_BITS, zero) {
+	const uint16_t positive_zero_f16 = UINT16_C(0x0000);
+	const uint16_t negative_zero_f16 = UINT16_C(0x8000);
+
+	const uint32_t positive_zero_f32 = UINT32_C(0x00000000);
+	const uint32_t negative_zero_f32 = UINT32_C(0x80000000);
+
+	EXPECT_EQ(positive_zero_f32, fp16_alt_to_fp32_bits(positive_zero_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << positive_zero_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(positive_zero_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_zero_f32;
+
+	EXPECT_EQ(negative_zero_f32, fp16_alt_to_fp32_bits(negative_zero_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << negative_zero_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(negative_zero_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_zero_f32;
+}
+
+TEST(FP16_ALT_TO_FP32_BITS, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h++) {
+			const uint16_t fp16 = h + ((uint16_t) (e + exponentBias) << 10);
+			const uint32_t fp32 = fp16::normalizedValues[h] + ((uint32_t) e << 23);
+			EXPECT_EQ(fp32, fp16_alt_to_fp32_bits(fp16)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16) << ", " <<
+				"F32 = 0x" << std::setw(8) << fp32;
+		}
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_BITS, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h++) {
+			const uint16_t fp16 = (h + ((uint16_t) (e + exponentBias) << 10)) ^ UINT16_C(0x8000);
+			const uint32_t fp32 = (fp16::normalizedValues[h] + ((uint32_t) e << 23)) ^ UINT32_C(0x80000000);
+			EXPECT_EQ(fp32, fp16_alt_to_fp32_bits(fp16)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16) << ", " <<
+				"F32 = 0x" << std::setw(8) << fp32;
+		}
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_BITS, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h++) {
+		EXPECT_EQ(fp16::denormalizedValues[h], fp16_alt_to_fp32_bits(h)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << h << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(h) << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16::denormalizedValues[h];
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_BITS, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h++) {
+		const uint16_t fp16 = h ^ UINT16_C(0x8000);
+		const uint32_t fp32 = fp16::denormalizedValues[h] ^ UINT32_C(0x80000000);
+		EXPECT_EQ(fp32, fp16_alt_to_fp32_bits(fp16)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16) << ", " <<
+			"F32 = 0x" << std::setw(8) << fp32;
+	}
+}
diff --git a/test/alt-to-fp32-psimd.cc b/test/alt-to-fp32-psimd.cc
new file mode 100644
index 0000000..c492b36
--- /dev/null
+++ b/test/alt-to-fp32-psimd.cc
@@ -0,0 +1,145 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include <fp16/psimd.h>
+
+
+TEST(FP16_ALT_TO_FP32_PSIMD, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += 4) {
+			const psimd_u16 fp16 = {
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 1),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 2),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 3),
+			};
+			const psimd_u32 fp32 = (psimd_u32) fp16_alt_to_fp32_psimd(fp16);
+
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+		}
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_PSIMD, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += 4) {
+			const psimd_u16 fp16 = {
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8000),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8001),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8002),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8003),
+			};
+			const psimd_u32 fp32 = (psimd_u32) fp16_alt_to_fp32_psimd(fp16);
+
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+		}
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_PSIMD, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += 4) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0),
+			(uint16_t) (h + 1),
+			(uint16_t) (h + 2),
+			(uint16_t) (h + 3),
+		};
+		const psimd_u32 fp32 = (psimd_u32) fp16_alt_to_fp32_psimd(fp16);
+
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_PSIMD, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += 4) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0x8000),
+			(uint16_t) (h + 0x8001),
+			(uint16_t) (h + 0x8002),
+			(uint16_t) (h + 0x8003),
+		};
+		const psimd_u32 fp32 = (psimd_u32) fp16_alt_to_fp32_psimd(fp16);
+
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+	}
+}
diff --git a/test/alt-to-fp32-value.cc b/test/alt-to-fp32-value.cc
new file mode 100644
index 0000000..6277576
--- /dev/null
+++ b/test/alt-to-fp32-value.cc
@@ -0,0 +1,348 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <cmath>
+
+#include <fp16.h>
+#include "tables.h"
+
+
+TEST(FP16_ALT_TO_FP32_VALUE, normalized_powers_of_2) {
+	const uint16_t min_po2_f16   = UINT16_C(0x0400);
+	const uint16_t eighths_f16   = UINT16_C(0x3000);
+	const uint16_t quarter_f16   = UINT16_C(0x3400);
+	const uint16_t half_f16      = UINT16_C(0x3800);
+	const uint16_t one_f16       = UINT16_C(0x3C00);
+	const uint16_t two_f16       = UINT16_C(0x4000);
+	const uint16_t four_f16      = UINT16_C(0x4400);
+	const uint16_t eight_f16     = UINT16_C(0x4800);
+	const uint16_t sixteen_f16   = UINT16_C(0x4C00);
+	const uint16_t thirtytwo_f16 = UINT16_C(0x5000);
+	const uint16_t sixtyfour_f16 = UINT16_C(0x5400);
+	const uint16_t max_po2_f16   = UINT16_C(0x7C00);
+
+	const uint32_t min_po2_f32   = UINT32_C(0x38800000);
+	const uint32_t eighths_f32   = UINT32_C(0x3E000000);
+	const uint32_t quarter_f32   = UINT32_C(0x3E800000);
+	const uint32_t half_f32      = UINT32_C(0x3F000000);
+	const uint32_t one_f32       = UINT32_C(0x3F800000);
+	const uint32_t two_f32       = UINT32_C(0x40000000);
+	const uint32_t four_f32      = UINT32_C(0x40800000);
+	const uint32_t eight_f32     = UINT32_C(0x41000000);
+	const uint32_t sixteen_f32   = UINT32_C(0x41800000);
+	const uint32_t thirtytwo_f32 = UINT32_C(0x42000000);
+	const uint32_t sixtyfour_f32 = UINT32_C(0x42800000);
+	const uint32_t max_po2_f32   = UINT32_C(0x47800000);
+
+	const float min_po2_value = fp16_alt_to_fp32_value(min_po2_f16);
+	uint32_t min_po2_bits;
+	memcpy(&min_po2_bits, &min_po2_value, sizeof(min_po2_bits));
+	EXPECT_EQ(min_po2_f32, min_po2_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << min_po2_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << min_po2_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << min_po2_f32;
+
+	const float eighths_value = fp16_alt_to_fp32_value(eighths_f16);
+	uint32_t eighths_bits;
+	memcpy(&eighths_bits, &eighths_value, sizeof(eighths_bits));
+	EXPECT_EQ(eighths_f32, eighths_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << eighths_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << eighths_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << eighths_f32;
+
+	const float quarter_value = fp16_alt_to_fp32_value(quarter_f16);
+	uint32_t quarter_bits;
+	memcpy(&quarter_bits, &quarter_value, sizeof(quarter_bits));
+	EXPECT_EQ(quarter_f32, quarter_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << quarter_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << quarter_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << quarter_f32;
+
+	const float half_value = fp16_alt_to_fp32_value(half_f16);
+	uint32_t half_bits;
+	memcpy(&half_bits, &half_value, sizeof(half_bits));
+	EXPECT_EQ(half_f32, half_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << half_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << half_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << half_f32;
+
+	const float one_value = fp16_alt_to_fp32_value(one_f16);
+	uint32_t one_bits;
+	memcpy(&one_bits, &one_value, sizeof(one_bits));
+	EXPECT_EQ(one_f32, one_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << one_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << one_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << one_f32;
+
+	const float two_value = fp16_alt_to_fp32_value(two_f16);
+	uint32_t two_bits;
+	memcpy(&two_bits, &two_value, sizeof(two_bits));
+	EXPECT_EQ(two_f32, two_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << two_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << two_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << two_f32;
+
+	const float four_value = fp16_alt_to_fp32_value(four_f16);
+	uint32_t four_bits;
+	memcpy(&four_bits, &four_value, sizeof(four_bits));
+	EXPECT_EQ(four_f32, four_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << four_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << four_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << four_f32;
+
+	const float eight_value = fp16_alt_to_fp32_value(eight_f16);
+	uint32_t eight_bits;
+	memcpy(&eight_bits, &eight_value, sizeof(eight_bits));
+	EXPECT_EQ(eight_f32, eight_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << eight_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << eight_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << eight_f32;
+
+	const float sixteen_value = fp16_alt_to_fp32_value(sixteen_f16);
+	uint32_t sixteen_bits;
+	memcpy(&sixteen_bits, &sixteen_value, sizeof(sixteen_bits));
+	EXPECT_EQ(sixteen_f32, sixteen_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << sixteen_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << sixteen_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << sixteen_f32;
+
+	const float thirtytwo_value = fp16_alt_to_fp32_value(thirtytwo_f16);
+	uint32_t thirtytwo_bits;
+	memcpy(&thirtytwo_bits, &thirtytwo_value, sizeof(thirtytwo_bits));
+	EXPECT_EQ(thirtytwo_f32, thirtytwo_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << thirtytwo_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << thirtytwo_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << thirtytwo_f32;
+
+	const float sixtyfour_value = fp16_alt_to_fp32_value(sixtyfour_f16);
+	uint32_t sixtyfour_bits;
+	memcpy(&sixtyfour_bits, &sixtyfour_value, sizeof(sixtyfour_bits));
+	EXPECT_EQ(sixtyfour_f32, sixtyfour_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << sixtyfour_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << sixtyfour_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << sixtyfour_f32;
+
+	const float max_po2_value = fp16_alt_to_fp32_value(max_po2_f16);
+	uint32_t max_po2_bits;
+	memcpy(&max_po2_bits, &max_po2_value, sizeof(max_po2_bits));
+	EXPECT_EQ(max_po2_f32, max_po2_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << max_po2_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << max_po2_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << max_po2_f32;
+}
+
+TEST(FP16_ALT_TO_FP32_VALUE, denormalized_powers_of_2) {
+	const uint16_t exp2_minus_15_f16 = UINT16_C(0x0200);
+	const uint16_t exp2_minus_16_f16 = UINT16_C(0x0100);
+	const uint16_t exp2_minus_17_f16 = UINT16_C(0x0080);
+	const uint16_t exp2_minus_18_f16 = UINT16_C(0x0040);
+	const uint16_t exp2_minus_19_f16 = UINT16_C(0x0020);
+	const uint16_t exp2_minus_20_f16 = UINT16_C(0x0010);
+	const uint16_t exp2_minus_21_f16 = UINT16_C(0x0008);
+	const uint16_t exp2_minus_22_f16 = UINT16_C(0x0004);
+	const uint16_t exp2_minus_23_f16 = UINT16_C(0x0002);
+	const uint16_t exp2_minus_24_f16 = UINT16_C(0x0001);
+
+	const uint32_t exp2_minus_15_f32 = UINT32_C(0x38000000);
+	const uint32_t exp2_minus_16_f32 = UINT32_C(0x37800000);
+	const uint32_t exp2_minus_17_f32 = UINT32_C(0x37000000);
+	const uint32_t exp2_minus_18_f32 = UINT32_C(0x36800000);
+	const uint32_t exp2_minus_19_f32 = UINT32_C(0x36000000);
+	const uint32_t exp2_minus_20_f32 = UINT32_C(0x35800000);
+	const uint32_t exp2_minus_21_f32 = UINT32_C(0x35000000);
+	const uint32_t exp2_minus_22_f32 = UINT32_C(0x34800000);
+	const uint32_t exp2_minus_23_f32 = UINT32_C(0x34000000);
+	const uint32_t exp2_minus_24_f32 = UINT32_C(0x33800000);
+
+	const float exp2_minus_15_value = fp16_alt_to_fp32_value(exp2_minus_15_f16);
+	uint32_t exp2_minus_15_bits;
+	memcpy(&exp2_minus_15_bits, &exp2_minus_15_value, sizeof(exp2_minus_15_bits));
+	EXPECT_EQ(exp2_minus_15_f32, exp2_minus_15_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_15_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_15_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_15_f32;
+
+	const float exp2_minus_16_value = fp16_alt_to_fp32_value(exp2_minus_16_f16);
+	uint32_t exp2_minus_16_bits;
+	memcpy(&exp2_minus_16_bits, &exp2_minus_16_value, sizeof(exp2_minus_16_bits));
+	EXPECT_EQ(exp2_minus_16_f32, exp2_minus_16_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_16_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_16_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_16_f32;
+
+	const float exp2_minus_17_value = fp16_alt_to_fp32_value(exp2_minus_17_f16);
+	uint32_t exp2_minus_17_bits;
+	memcpy(&exp2_minus_17_bits, &exp2_minus_17_value, sizeof(exp2_minus_17_bits));
+	EXPECT_EQ(exp2_minus_17_f32, exp2_minus_17_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_17_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_17_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_17_f32;
+
+	const float exp2_minus_18_value = fp16_alt_to_fp32_value(exp2_minus_18_f16);
+	uint32_t exp2_minus_18_bits;
+	memcpy(&exp2_minus_18_bits, &exp2_minus_18_value, sizeof(exp2_minus_18_bits));
+	EXPECT_EQ(exp2_minus_18_f32, exp2_minus_18_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_18_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_18_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_18_f32;
+
+	const float exp2_minus_19_value = fp16_alt_to_fp32_value(exp2_minus_19_f16);
+	uint32_t exp2_minus_19_bits;
+	memcpy(&exp2_minus_19_bits, &exp2_minus_19_value, sizeof(exp2_minus_19_bits));
+	EXPECT_EQ(exp2_minus_19_f32, exp2_minus_19_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_19_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_19_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_19_f32;
+
+	const float exp2_minus_20_value = fp16_alt_to_fp32_value(exp2_minus_20_f16);
+	uint32_t exp2_minus_20_bits;
+	memcpy(&exp2_minus_20_bits, &exp2_minus_20_value, sizeof(exp2_minus_20_bits));
+	EXPECT_EQ(exp2_minus_20_f32, exp2_minus_20_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_20_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_20_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_20_f32;
+
+	const float exp2_minus_21_value = fp16_alt_to_fp32_value(exp2_minus_21_f16);
+	uint32_t exp2_minus_21_bits;
+	memcpy(&exp2_minus_21_bits, &exp2_minus_21_value, sizeof(exp2_minus_21_bits));
+	EXPECT_EQ(exp2_minus_21_f32, exp2_minus_21_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_21_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_21_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_21_f32;
+
+	const float exp2_minus_22_value = fp16_alt_to_fp32_value(exp2_minus_22_f16);
+	uint32_t exp2_minus_22_bits;
+	memcpy(&exp2_minus_22_bits, &exp2_minus_22_value, sizeof(exp2_minus_22_bits));
+	EXPECT_EQ(exp2_minus_22_f32, exp2_minus_22_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_22_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_22_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_22_f32;
+
+	const float exp2_minus_23_value = fp16_alt_to_fp32_value(exp2_minus_23_f16);
+	uint32_t exp2_minus_23_bits;
+	memcpy(&exp2_minus_23_bits, &exp2_minus_23_value, sizeof(exp2_minus_23_bits));
+	EXPECT_EQ(exp2_minus_23_f32, exp2_minus_23_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_23_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_23_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_23_f32;
+
+	const float exp2_minus_24_value = fp16_alt_to_fp32_value(exp2_minus_24_f16);
+	uint32_t exp2_minus_24_bits;
+	memcpy(&exp2_minus_24_bits, &exp2_minus_24_value, sizeof(exp2_minus_24_bits));
+	EXPECT_EQ(exp2_minus_24_f32, exp2_minus_24_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_24_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_24_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_24_f32;
+}
+
+TEST(FP16_ALT_TO_FP32_VALUE, zero) {
+	const uint16_t positive_zero_f16 = UINT16_C(0x0000);
+	const uint16_t negative_zero_f16 = UINT16_C(0x8000);
+
+	const uint32_t positive_zero_f32 = UINT32_C(0x00000000);
+	const uint32_t negative_zero_f32 = UINT32_C(0x80000000);
+
+	const float positive_zero_value = fp16_alt_to_fp32_value(positive_zero_f16);
+	uint32_t positive_zero_bits;
+	memcpy(&positive_zero_bits, &positive_zero_value, sizeof(positive_zero_bits));
+	EXPECT_EQ(positive_zero_f32, positive_zero_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << positive_zero_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << positive_zero_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_zero_f32;
+
+	const float negative_zero_value = fp16_alt_to_fp32_value(negative_zero_f16);
+	uint32_t negative_zero_bits;
+	memcpy(&negative_zero_bits, &negative_zero_value, sizeof(negative_zero_bits));
+	EXPECT_EQ(negative_zero_f32, negative_zero_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << negative_zero_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << negative_zero_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_zero_f32;
+}
+
+TEST(FP16_ALT_TO_FP32_VALUE, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h++) {
+			const uint16_t fp16 = h + ((uint16_t) (e + exponentBias) << 10);
+			const uint32_t fp32 = fp16::normalizedValues[h] + ((uint32_t) e << 23);
+			const float value = fp16_alt_to_fp32_value(fp16);
+			uint32_t bits;
+			memcpy(&bits, &value, sizeof(bits));
+			EXPECT_EQ(fp32, bits) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << bits << ", " <<
+				"F32 = 0x" << std::setw(8) << fp32;
+		}
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_VALUE, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h++) {
+			const uint16_t fp16 = (h + ((uint16_t) (e + exponentBias) << 10)) ^ UINT16_C(0x8000);
+			const uint32_t fp32 = (fp16::normalizedValues[h] + ((uint32_t) e << 23)) ^ UINT32_C(0x80000000);
+			const float value = fp16_alt_to_fp32_value(fp16);
+			uint32_t bits;
+			memcpy(&bits, &value, sizeof(bits));
+			EXPECT_EQ(fp32, bits) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << bits << ", " <<
+				"F32 = 0x" << std::setw(8) << fp32;
+		}
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_VALUE, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h++) {
+		const float value = fp16_alt_to_fp32_value(h);
+		uint32_t bits;
+		memcpy(&bits, &value, sizeof(bits));
+		EXPECT_EQ(fp16::denormalizedValues[h], bits) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << h << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << bits << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16::denormalizedValues[h];
+	}
+}
+
+TEST(FP16_ALT_TO_FP32_VALUE, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h++) {
+		const uint16_t fp16 = h ^ UINT16_C(0x8000);
+		const uint32_t fp32 = fp16::denormalizedValues[h] ^ UINT32_C(0x80000000);
+		const float value = fp16_alt_to_fp32_value(fp16);
+		uint32_t bits;
+		memcpy(&bits, &value, sizeof(bits));
+		EXPECT_EQ(fp32, bits) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << bits << ", " <<
+			"F32 = 0x" << std::setw(8) << fp32;
+	}
+}
diff --git a/test/alt-to-fp32x2-psimd.cc b/test/alt-to-fp32x2-psimd.cc
new file mode 100644
index 0000000..b571006
--- /dev/null
+++ b/test/alt-to-fp32x2-psimd.cc
@@ -0,0 +1,245 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include <fp16/psimd.h>
+
+
+TEST(FP16_ALT_TO_FP32x2_PSIMD, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += 8) {
+			const psimd_u16 fp16 = {
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 1),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 2),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 3),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 4),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 5),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 6),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 7)
+			};
+			const psimd_u32x2 fp32 =
+				psimd_cast_f32x2_u32x2(fp16_alt_to_fp32x2_psimd(fp16));
+
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[4]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[5]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[6]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[7]);
+		}
+	}
+}
+
+TEST(FP16_ALT_TO_FP32x2_PSIMD, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += 8) {
+			const psimd_u16 fp16 = {
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8000),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8001),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8002),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8003),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8004),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8005),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8006),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8007)
+			};
+			const psimd_u32x2 fp32 =
+				psimd_cast_f32x2_u32x2(fp16_alt_to_fp32x2_psimd(fp16));
+
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[4]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[5]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[6]);
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[7]);
+		}
+	}
+}
+
+TEST(FP16_ALT_TO_FP32x2_PSIMD, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += 8) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0),
+			(uint16_t) (h + 1),
+			(uint16_t) (h + 2),
+			(uint16_t) (h + 3),
+			(uint16_t) (h + 4),
+			(uint16_t) (h + 5),
+			(uint16_t) (h + 6),
+			(uint16_t) (h + 7)
+		};
+		const psimd_u32x2 fp32 =
+			psimd_cast_f32x2_u32x2(fp16_alt_to_fp32x2_psimd(fp16));
+
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[4]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[5]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[6]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[7]);
+	}
+}
+
+TEST(FP16_ALT_TO_FP32x2_PSIMD, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += 8) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0x8000),
+			(uint16_t) (h + 0x8001),
+			(uint16_t) (h + 0x8002),
+			(uint16_t) (h + 0x8003),
+			(uint16_t) (h + 0x8004),
+			(uint16_t) (h + 0x8005),
+			(uint16_t) (h + 0x8006),
+			(uint16_t) (h + 0x8007)
+		};
+		const psimd_u32x2 fp32 =
+			psimd_cast_f32x2_u32x2(fp16_alt_to_fp32x2_psimd(fp16));
+
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[0]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[1]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[2]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[3]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[4]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[5]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[6]);
+		EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[7]);
+	}
+}
diff --git a/test/bitcasts.cc b/test/bitcasts.cc
new file mode 100644
index 0000000..f7be29d
--- /dev/null
+++ b/test/bitcasts.cc
@@ -0,0 +1,56 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+
+
+TEST(FP32_TO_BITS, positive) {
+	for (uint32_t bits = UINT32_C(0x00000000); bits <= UINT32_C(0x7FFFFFFF); bits++) {
+		float value;
+		memcpy(&value, &bits, sizeof(value));
+
+		ASSERT_EQ(bits, fp32_to_bits(value)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"BITS = 0x" << std::setw(8) << bits << ", " <<
+			"BITCAST(VALUE) = 0x" << std::setw(8) << fp32_to_bits(value);
+	}
+}
+
+TEST(FP32_TO_BITS, negative) {
+	for (uint32_t bits = UINT32_C(0xFFFFFFFF); bits >= UINT32_C(0x80000000); bits--) {
+		float value;
+		memcpy(&value, &bits, sizeof(value));
+
+		ASSERT_EQ(bits, fp32_to_bits(value)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"BITS = 0x" << std::setw(8) << bits << ", " <<
+			"BITCAST(VALUE) = 0x" << std::setw(8) << fp32_to_bits(value);
+	}
+}
+
+TEST(FP32_FROM_BITS, positive) {
+	for (uint32_t bits = UINT32_C(0x00000000); bits <= UINT32_C(0x7FFFFFFF); bits++) {
+		const float value = fp32_from_bits(bits);
+		uint32_t bitcast;
+		memcpy(&bitcast, &value, sizeof(bitcast));
+
+		ASSERT_EQ(bits, bitcast) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"BITS = 0x" << std::setw(8) << bits << ", " <<
+			"VALUE = 0x" << std::setw(8) << bitcast;
+	}
+}
+
+TEST(FP32_FROM_BITS, negative) {
+	for (uint32_t bits = UINT32_C(0xFFFFFFFF); bits >= UINT32_C(0x80000000); bits--) {
+		const float value = fp32_from_bits(bits);
+		uint32_t bitcast;
+		memcpy(&bitcast, &value, sizeof(bitcast));
+
+		ASSERT_EQ(bits, bitcast) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"BITS = 0x" << std::setw(8) << bits << ", " <<
+			"VALUE = 0x" << std::setw(8) << bitcast;
+	}
+}
diff --git a/test/ieee-from-fp32-value.cc b/test/ieee-from-fp32-value.cc
new file mode 100644
index 0000000..24e3e7e
--- /dev/null
+++ b/test/ieee-from-fp32-value.cc
@@ -0,0 +1,523 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include "tables.h"
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	#include <x86intrin.h>
+#endif
+
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, normalized_powers_of_2) {
+	const uint16_t min_po2_f16   = UINT16_C(0x0400);
+	const uint16_t eighths_f16   = UINT16_C(0x3000);
+	const uint16_t quarter_f16   = UINT16_C(0x3400);
+	const uint16_t half_f16      = UINT16_C(0x3800);
+	const uint16_t one_f16       = UINT16_C(0x3C00);
+	const uint16_t two_f16       = UINT16_C(0x4000);
+	const uint16_t four_f16      = UINT16_C(0x4400);
+	const uint16_t eight_f16     = UINT16_C(0x4800);
+	const uint16_t sixteen_f16   = UINT16_C(0x4C00);
+	const uint16_t thirtytwo_f16 = UINT16_C(0x5000);
+	const uint16_t sixtyfour_f16 = UINT16_C(0x5400);
+	const uint16_t max_po2_f16   = UINT16_C(0x7800);
+
+	const uint32_t min_po2_f32   = UINT32_C(0x38800000);
+	const uint32_t eighths_f32   = UINT32_C(0x3E000000);
+	const uint32_t quarter_f32   = UINT32_C(0x3E800000);
+	const uint32_t half_f32      = UINT32_C(0x3F000000);
+	const uint32_t one_f32       = UINT32_C(0x3F800000);
+	const uint32_t two_f32       = UINT32_C(0x40000000);
+	const uint32_t four_f32      = UINT32_C(0x40800000);
+	const uint32_t eight_f32     = UINT32_C(0x41000000);
+	const uint32_t sixteen_f32   = UINT32_C(0x41800000);
+	const uint32_t thirtytwo_f32 = UINT32_C(0x42000000);
+	const uint32_t sixtyfour_f32 = UINT32_C(0x42800000);
+	const uint32_t max_po2_f32   = UINT32_C(0x47000000);
+
+	float min_po2_value;
+	memcpy(&min_po2_value, &min_po2_f32, sizeof(min_po2_value));
+	EXPECT_EQ(min_po2_f16, fp16_ieee_from_fp32_value(min_po2_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << min_po2_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(min_po2_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << min_po2_f16;
+
+	float eighths_value;
+	memcpy(&eighths_value, &eighths_f32, sizeof(eighths_value));
+	EXPECT_EQ(eighths_f16, fp16_ieee_from_fp32_value(eighths_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << eighths_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(eighths_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << eighths_f16;
+
+	float quarter_value;
+	memcpy(&quarter_value, &quarter_f32, sizeof(quarter_value));
+	EXPECT_EQ(quarter_f16, fp16_ieee_from_fp32_value(quarter_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << quarter_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(quarter_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << quarter_f16;
+
+	float half_value;
+	memcpy(&half_value, &half_f32, sizeof(half_value));
+	EXPECT_EQ(half_f16, fp16_ieee_from_fp32_value(half_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << half_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(half_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << half_f16;
+
+	float one_value;
+	memcpy(&one_value, &one_f32, sizeof(one_value));
+	EXPECT_EQ(one_f16, fp16_ieee_from_fp32_value(one_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << one_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(one_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << one_f16;
+
+	float two_value;
+	memcpy(&two_value, &two_f32, sizeof(two_value));
+	EXPECT_EQ(two_f16, fp16_ieee_from_fp32_value(two_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << two_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(two_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << two_f16;
+
+	float four_value;
+	memcpy(&four_value, &four_f32, sizeof(four_value));
+	EXPECT_EQ(four_f16, fp16_ieee_from_fp32_value(four_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << four_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(four_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << four_f16;
+
+	float eight_value;
+	memcpy(&eight_value, &eight_f32, sizeof(eight_value));
+	EXPECT_EQ(eight_f16, fp16_ieee_from_fp32_value(eight_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << eight_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(eight_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << eight_f16;
+
+	float sixteen_value;
+	memcpy(&sixteen_value, &sixteen_f32, sizeof(sixteen_value));
+	EXPECT_EQ(sixteen_f16, fp16_ieee_from_fp32_value(sixteen_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << sixteen_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(sixteen_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << sixteen_f16;
+
+	float thirtytwo_value;
+	memcpy(&thirtytwo_value, &thirtytwo_f32, sizeof(thirtytwo_value));
+	EXPECT_EQ(thirtytwo_f16, fp16_ieee_from_fp32_value(thirtytwo_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << thirtytwo_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(thirtytwo_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << thirtytwo_f16;
+
+	float sixtyfour_value;
+	memcpy(&sixtyfour_value, &sixtyfour_f32, sizeof(sixtyfour_value));
+	EXPECT_EQ(sixtyfour_f16, fp16_ieee_from_fp32_value(sixtyfour_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << sixtyfour_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(sixtyfour_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << sixtyfour_f16;
+
+	float max_po2_value;
+	memcpy(&max_po2_value, &max_po2_f32, sizeof(max_po2_value));
+	EXPECT_EQ(max_po2_f16, fp16_ieee_from_fp32_value(max_po2_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << max_po2_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(max_po2_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << max_po2_f16;
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, denormalized_powers_of_2) {
+	const uint16_t exp2_minus_15_f16 = UINT16_C(0x0200);
+	const uint16_t exp2_minus_16_f16 = UINT16_C(0x0100);
+	const uint16_t exp2_minus_17_f16 = UINT16_C(0x0080);
+	const uint16_t exp2_minus_18_f16 = UINT16_C(0x0040);
+	const uint16_t exp2_minus_19_f16 = UINT16_C(0x0020);
+	const uint16_t exp2_minus_20_f16 = UINT16_C(0x0010);
+	const uint16_t exp2_minus_21_f16 = UINT16_C(0x0008);
+	const uint16_t exp2_minus_22_f16 = UINT16_C(0x0004);
+	const uint16_t exp2_minus_23_f16 = UINT16_C(0x0002);
+	const uint16_t exp2_minus_24_f16 = UINT16_C(0x0001);
+	const uint16_t exp2_minus_25_f16 = UINT16_C(0x0000);
+
+	const uint32_t exp2_minus_15_f32 = UINT32_C(0x38000000);
+	const uint32_t exp2_minus_16_f32 = UINT32_C(0x37800000);
+	const uint32_t exp2_minus_17_f32 = UINT32_C(0x37000000);
+	const uint32_t exp2_minus_18_f32 = UINT32_C(0x36800000);
+	const uint32_t exp2_minus_19_f32 = UINT32_C(0x36000000);
+	const uint32_t exp2_minus_20_f32 = UINT32_C(0x35800000);
+	const uint32_t exp2_minus_21_f32 = UINT32_C(0x35000000);
+	const uint32_t exp2_minus_22_f32 = UINT32_C(0x34800000);
+	const uint32_t exp2_minus_23_f32 = UINT32_C(0x34000000);
+	const uint32_t exp2_minus_24_f32 = UINT32_C(0x33800000);
+	const uint32_t exp2_minus_25_f32 = UINT32_C(0x33000000);
+
+	float exp2_minus_15_value;
+	memcpy(&exp2_minus_15_value, &exp2_minus_15_f32, sizeof(exp2_minus_15_value));
+	EXPECT_EQ(exp2_minus_15_f16, fp16_ieee_from_fp32_value(exp2_minus_15_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_15_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_15_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_15_f16;
+
+	float exp2_minus_16_value;
+	memcpy(&exp2_minus_16_value, &exp2_minus_16_f32, sizeof(exp2_minus_16_value));
+	EXPECT_EQ(exp2_minus_16_f16, fp16_ieee_from_fp32_value(exp2_minus_16_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_16_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_16_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_16_f16;
+
+	float exp2_minus_17_value;
+	memcpy(&exp2_minus_17_value, &exp2_minus_17_f32, sizeof(exp2_minus_17_value));
+	EXPECT_EQ(exp2_minus_17_f16, fp16_ieee_from_fp32_value(exp2_minus_17_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_17_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_17_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_17_f16;
+
+	float exp2_minus_18_value;
+	memcpy(&exp2_minus_18_value, &exp2_minus_18_f32, sizeof(exp2_minus_18_value));
+	EXPECT_EQ(exp2_minus_18_f16, fp16_ieee_from_fp32_value(exp2_minus_18_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_18_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_18_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_18_f16;
+
+	float exp2_minus_19_value;
+	memcpy(&exp2_minus_19_value, &exp2_minus_19_f32, sizeof(exp2_minus_19_value));
+	EXPECT_EQ(exp2_minus_19_f16, fp16_ieee_from_fp32_value(exp2_minus_19_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_19_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_19_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_19_f16;
+
+	float exp2_minus_20_value;
+	memcpy(&exp2_minus_20_value, &exp2_minus_20_f32, sizeof(exp2_minus_20_value));
+	EXPECT_EQ(exp2_minus_20_f16, fp16_ieee_from_fp32_value(exp2_minus_20_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_20_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_20_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_20_f16;
+
+	float exp2_minus_21_value;
+	memcpy(&exp2_minus_21_value, &exp2_minus_21_f32, sizeof(exp2_minus_21_value));
+	EXPECT_EQ(exp2_minus_21_f16, fp16_ieee_from_fp32_value(exp2_minus_21_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_21_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_21_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_21_f16;
+
+	float exp2_minus_22_value;
+	memcpy(&exp2_minus_22_value, &exp2_minus_22_f32, sizeof(exp2_minus_22_value));
+	EXPECT_EQ(exp2_minus_22_f16, fp16_ieee_from_fp32_value(exp2_minus_22_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_22_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_22_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_22_f16;
+
+	float exp2_minus_23_value;
+	memcpy(&exp2_minus_23_value, &exp2_minus_23_f32, sizeof(exp2_minus_23_value));
+	EXPECT_EQ(exp2_minus_23_f16, fp16_ieee_from_fp32_value(exp2_minus_23_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_23_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_23_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_23_f16;
+
+	float exp2_minus_24_value;
+	memcpy(&exp2_minus_24_value, &exp2_minus_24_f32, sizeof(exp2_minus_24_value));
+	EXPECT_EQ(exp2_minus_24_f16, fp16_ieee_from_fp32_value(exp2_minus_24_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_24_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_24_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_24_f16;
+
+	float exp2_minus_25_value;
+	memcpy(&exp2_minus_25_value, &exp2_minus_25_f32, sizeof(exp2_minus_25_value));
+	EXPECT_EQ(exp2_minus_25_f16, fp16_ieee_from_fp32_value(exp2_minus_25_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_25_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(exp2_minus_25_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_25_f16;
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, zero) {
+	const uint16_t positive_zero_f16 = UINT16_C(0x0000);
+	const uint16_t negative_zero_f16 = UINT16_C(0x8000);
+
+	const uint32_t positive_zero_f32 = UINT32_C(0x00000000);
+	const uint32_t negative_zero_f32 = UINT32_C(0x80000000);
+
+	float positive_zero_value;
+	memcpy(&positive_zero_value, &positive_zero_f32, sizeof(positive_zero_value));
+	EXPECT_EQ(positive_zero_f16, fp16_ieee_from_fp32_value(positive_zero_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << positive_zero_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(positive_zero_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << positive_zero_f16;
+
+	float negative_zero_value;
+	memcpy(&negative_zero_value, &negative_zero_f32, sizeof(negative_zero_value));
+	EXPECT_EQ(negative_zero_f16, fp16_ieee_from_fp32_value(negative_zero_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << negative_zero_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(negative_zero_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << negative_zero_f16;
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, infinity) {
+	const uint16_t positive_infinity_f16 = UINT16_C(0x7C00);
+	const uint16_t negative_infinity_f16 = UINT16_C(0xFC00);
+
+	const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+	const uint32_t negative_infinity_f32 = UINT32_C(0xFF800000);
+
+	float positive_infinity_value;
+	memcpy(&positive_infinity_value, &positive_infinity_f32, sizeof(positive_infinity_value));
+	EXPECT_EQ(positive_infinity_f16, fp16_ieee_from_fp32_value(positive_infinity_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(positive_infinity_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << positive_infinity_f16;
+
+	float negative_infinity_value;
+	memcpy(&negative_infinity_value, &negative_infinity_f32, sizeof(negative_infinity_value));
+	EXPECT_EQ(negative_infinity_f16, fp16_ieee_from_fp32_value(negative_infinity_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(negative_infinity_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << negative_infinity_f16;
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, positive_nan) {
+	for (uint32_t nan_f32 = UINT32_C(0x7FFFFFFF); nan_f32 > UINT32_C(0x7F800000); nan_f32--) {
+		float nan_value;
+		memcpy(&nan_value, &nan_f32, sizeof(nan_value));
+		const uint16_t nan_f16 = fp16_ieee_from_fp32_value(nan_value);
+
+		/* Check sign */
+		ASSERT_EQ(nan_f16 & UINT16_C(0x8000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+
+		/* Check exponent */
+		ASSERT_EQ(nan_f16 & UINT16_C(0x7C00), UINT16_C(0x7C00)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+
+		/* Check mantissa */
+		ASSERT_NE(nan_f16 & UINT16_C(0x03FF), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+	}
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, negative_nan) {
+	for (uint32_t nan_f32 = UINT32_C(0xFFFFFFFF); nan_f32 > UINT32_C(0xFF800000); nan_f32--) {
+		float nan_value;
+		memcpy(&nan_value, &nan_f32, sizeof(nan_value));
+		const uint16_t nan_f16 = fp16_ieee_from_fp32_value(nan_value);
+
+		/* Check sign */
+		ASSERT_EQ(nan_f16 & UINT16_C(0x8000), UINT16_C(0x8000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+
+		/* Check exponent */
+		ASSERT_EQ(nan_f16 & UINT16_C(0x7C00), UINT16_C(0x7C00)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+
+		/* Check mantissa */
+		ASSERT_NE(nan_f16 & UINT16_C(0x03FF), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << nan_f32 << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << nan_f16;
+	}
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, revertible) {
+	/* Positive values */
+	for (uint16_t f16 = UINT16_C(0x0000); f16 < UINT16_C(0x7C00); f16++) {
+		const float value_f32 = fp16_ieee_to_fp32_value(f16);
+		uint32_t bits_f32;
+		memcpy(&bits_f32, &value_f32, sizeof(bits_f32));
+
+		ASSERT_EQ(f16, fp16_ieee_from_fp32_value(value_f32)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << bits_f32 << ", " <<
+			"F16(F32(F16)) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value_f32);
+	}
+
+	/* Negative values */
+	for (uint16_t f16 = UINT16_C(0x8000); f16 < UINT16_C(0xFC00); f16++) {
+		const float value_f32 = fp16_ieee_to_fp32_value(f16);
+		uint32_t bits_f32;
+		memcpy(&bits_f32, &value_f32, sizeof(bits_f32));
+
+		ASSERT_EQ(f16, fp16_ieee_from_fp32_value(value_f32)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << bits_f32 << ", " <<
+			"F16(F32(F16)) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value_f32);
+	}
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, underflow) {
+	const uint32_t min_nonzero_f32 = UINT32_C(0x33000001);
+	const uint16_t zero_f16 = UINT16_C(0x0000);
+	const uint16_t min_f16 = UINT16_C(0x0001);
+	for (uint32_t bits = UINT32_C(0x00000001); bits < min_nonzero_f32; bits++) {
+		float value;
+		memcpy(&value, &bits, sizeof(value));
+		ASSERT_EQ(zero_f16, fp16_ieee_from_fp32_value(value)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << bits << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value) << ", " <<
+			"F16 = 0x" << std::setw(4) << zero_f16;
+	}
+	float min_nonzero_value;
+	memcpy(&min_nonzero_value, &min_nonzero_f32, sizeof(min_nonzero_value));
+	ASSERT_EQ(min_f16, fp16_ieee_from_fp32_value(min_nonzero_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << min_nonzero_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(min_nonzero_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << min_f16;
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, overflow) {
+	const uint32_t max_finite_f32 = UINT32_C(0x477FEFFF);
+	const uint16_t max_finite_f16 = UINT16_C(0x7BFF);
+	const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+	const uint16_t positive_infinity_f16 = UINT16_C(0x7C00);
+	for (uint32_t bits = positive_infinity_f32; bits > max_finite_f32; bits--) {
+		float value;
+		memcpy(&value, &bits, sizeof(value));
+		ASSERT_EQ(positive_infinity_f16, fp16_ieee_from_fp32_value(value)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F32 = 0x" << std::setw(8) << bits << ", " <<
+			"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value) << ", " <<
+			"F16 = 0x" << std::setw(4) << positive_infinity_f16;
+	}
+	float max_finite_value;
+	memcpy(&max_finite_value, &max_finite_f32, sizeof(max_finite_value));
+	ASSERT_EQ(max_finite_f16, fp16_ieee_from_fp32_value(max_finite_value)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F32 = 0x" << std::setw(8) << max_finite_f32 << ", " <<
+		"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(max_finite_value) << ", " <<
+		"F16 = 0x" << std::setw(4) << max_finite_f16;
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, positive_denormalized_values) {
+	const uint32_t min_nonzero_f32 = UINT32_C(0x33000001);
+
+	uint32_t f32_begin = min_nonzero_f32;
+	for (uint16_t f16 = 0; f16 < UINT16_C(0x0400); f16++) {
+		const uint32_t f32_end = fp16::denormalizedRanges[f16];
+		for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) {
+			float value;
+			memcpy(&value, &f32, sizeof(value));
+			ASSERT_EQ(f16, fp16_ieee_from_fp32_value(value)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F32 = 0x" << std::setw(8) << f32 << ", " <<
+				"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value) << ", " <<
+				"F16 = 0x" << std::setw(4) << f16;
+		}
+		f32_begin = f32_end;
+	}
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, negative_denormalized_values) {
+	const uint32_t min_nonzero_f32 = UINT32_C(0x33000001);
+
+	uint32_t f32_begin = min_nonzero_f32 | UINT32_C(0x80000000);
+	for (uint16_t f16 = UINT16_C(0x8000); f16 < UINT16_C(0x8400); f16++) {
+		const uint32_t f32_end = fp16::denormalizedRanges[f16 & UINT16_C(0x7FFF)] | UINT32_C(0x80000000);
+		for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) {
+			float value;
+			memcpy(&value, &f32, sizeof(value));
+			ASSERT_EQ(f16, fp16_ieee_from_fp32_value(value)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F32 = 0x" << std::setw(8) << f32 << ", " <<
+				"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value) << ", " <<
+				"F16 = 0x" << std::setw(4) << f16;
+		}
+		f32_begin = f32_end;
+	}
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, positive_normalized_values) {
+	/* Minimum number that rounds to 1.0h when converted to half-precision */
+	const uint32_t min_one_f32 = UINT32_C(0x3F7FF000);
+	const uint32_t e_bias = 15;
+
+	for (int32_t e = -14; e <= 15; e++) {
+		uint32_t f32_begin = min_one_f32 + (uint32_t(e) << 23);
+		for (uint16_t f16 = uint16_t(e + e_bias) << 10; f16 < uint16_t(e + e_bias + 1) << 10; f16++) {
+			const uint32_t f32_end = fp16::normalizedRanges[f16 & UINT16_C(0x3FF)] + (uint32_t(e) << 23);
+			for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) {
+				float value;
+				memcpy(&value, &f32, sizeof(value));
+				ASSERT_EQ(f16, fp16_ieee_from_fp32_value(value)) <<
+					std::hex << std::uppercase << std::setfill('0') <<
+					"F32 = 0x" << std::setw(8) << f32 << ", " <<
+					"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value) << ", " <<
+					"F16 = 0x" << std::setw(4) << f16;
+			}
+			f32_begin = f32_end;
+		}
+	}
+}
+
+TEST(FP16_IEEE_FROM_FP32_VALUE, negative_normalized_values) {
+	/* Minimum number that rounds to 1.0h when converted to half-precision */
+	const uint32_t min_one_f32 = UINT32_C(0x3F7FF000);
+	const uint32_t e_bias = 15;
+
+	for (int32_t e = -14; e <= 15; e++) {
+		uint32_t f32_begin = (min_one_f32 | UINT32_C(0x80000000)) + (uint32_t(e) << 23);
+		for (uint16_t f16 = (UINT16_C(0x8000) | (uint16_t(e + e_bias) << 10)); f16 < (UINT16_C(0x8000) | (uint16_t(e + e_bias + 1) << 10)); f16++) {
+			const uint32_t f32_end = (fp16::normalizedRanges[f16 & UINT16_C(0x3FF)] | UINT32_C(0x80000000)) + (uint32_t(e) << 23);
+			for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) {
+				float value;
+				memcpy(&value, &f32, sizeof(value));
+				ASSERT_EQ(f16, fp16_ieee_from_fp32_value(value)) <<
+					std::hex << std::uppercase << std::setfill('0') <<
+					"F32 = 0x" << std::setw(8) << f32 << ", " <<
+					"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value) << ", " <<
+					"F16 = 0x" << std::setw(4) << f16;
+			}
+			f32_begin = f32_end;
+		}
+	}
+}
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__)
+	TEST(FP16_IEEE_FROM_FP32_VALUE, match_hardware) {
+		const uint32_t min_nonzero = UINT32_C(0x00000001);
+		const uint32_t max_finite = UINT32_C(0x7F800000);
+		for (uint32_t bits = min_nonzero; bits < max_finite; bits++) {
+			float value;
+			memcpy(&value, &bits, sizeof(value));
+			const uint16_t reference = uint16_t(_mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), _MM_FROUND_CUR_DIRECTION)));
+			ASSERT_EQ(reference, fp16_ieee_from_fp32_value(value)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F32 = 0x" << std::setw(8) << bits << ", " <<
+				"F16(F32) = 0x" << std::setw(4) << fp16_ieee_from_fp32_value(value) << ", " <<
+				"F16 = 0x" << std::setw(4) << reference;
+		}
+	}
+#endif
diff --git a/test/ieee-to-fp32-bits.cc b/test/ieee-to-fp32-bits.cc
new file mode 100644
index 0000000..807ef4a
--- /dev/null
+++ b/test/ieee-to-fp32-bits.cc
@@ -0,0 +1,333 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include "tables.h"
+
+
+TEST(FP16_IEEE_TO_FP32_BITS, normalized_powers_of_2) {
+	const uint16_t min_po2_f16   = UINT16_C(0x0400);
+	const uint16_t eighths_f16   = UINT16_C(0x3000);
+	const uint16_t quarter_f16   = UINT16_C(0x3400);
+	const uint16_t half_f16      = UINT16_C(0x3800);
+	const uint16_t one_f16       = UINT16_C(0x3C00);
+	const uint16_t two_f16       = UINT16_C(0x4000);
+	const uint16_t four_f16      = UINT16_C(0x4400);
+	const uint16_t eight_f16     = UINT16_C(0x4800);
+	const uint16_t sixteen_f16   = UINT16_C(0x4C00);
+	const uint16_t thirtytwo_f16 = UINT16_C(0x5000);
+	const uint16_t sixtyfour_f16 = UINT16_C(0x5400);
+	const uint16_t max_po2_f16   = UINT16_C(0x7800);
+
+	const uint32_t min_po2_f32   = UINT32_C(0x38800000);
+	const uint32_t eighths_f32   = UINT32_C(0x3E000000);
+	const uint32_t quarter_f32   = UINT32_C(0x3E800000);
+	const uint32_t half_f32      = UINT32_C(0x3F000000);
+	const uint32_t one_f32       = UINT32_C(0x3F800000);
+	const uint32_t two_f32       = UINT32_C(0x40000000);
+	const uint32_t four_f32      = UINT32_C(0x40800000);
+	const uint32_t eight_f32     = UINT32_C(0x41000000);
+	const uint32_t sixteen_f32   = UINT32_C(0x41800000);
+	const uint32_t thirtytwo_f32 = UINT32_C(0x42000000);
+	const uint32_t sixtyfour_f32 = UINT32_C(0x42800000);
+	const uint32_t max_po2_f32   = UINT32_C(0x47000000);
+
+	EXPECT_EQ(min_po2_f32, fp16_ieee_to_fp32_bits(min_po2_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << min_po2_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(min_po2_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << min_po2_f32;
+
+	EXPECT_EQ(eighths_f32, fp16_ieee_to_fp32_bits(eighths_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << eighths_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(eighths_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << eighths_f32;
+
+	EXPECT_EQ(quarter_f32, fp16_ieee_to_fp32_bits(quarter_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << quarter_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(quarter_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << quarter_f32;
+
+	EXPECT_EQ(half_f32, fp16_ieee_to_fp32_bits(half_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << half_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(half_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << half_f32;
+
+	EXPECT_EQ(one_f32, fp16_ieee_to_fp32_bits(one_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << one_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(one_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << one_f32;
+
+	EXPECT_EQ(two_f32, fp16_ieee_to_fp32_bits(two_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << two_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(two_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << two_f32;
+
+	EXPECT_EQ(four_f32, fp16_ieee_to_fp32_bits(four_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << four_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(four_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << four_f32;
+
+	EXPECT_EQ(eight_f32, fp16_ieee_to_fp32_bits(eight_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << eight_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(eight_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << eight_f32;
+
+	EXPECT_EQ(sixteen_f32, fp16_ieee_to_fp32_bits(sixteen_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << sixteen_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(sixteen_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << sixteen_f32;
+
+	EXPECT_EQ(thirtytwo_f32, fp16_ieee_to_fp32_bits(thirtytwo_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << thirtytwo_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(thirtytwo_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << thirtytwo_f32;
+
+	EXPECT_EQ(sixtyfour_f32, fp16_ieee_to_fp32_bits(sixtyfour_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << sixtyfour_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(sixtyfour_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << sixtyfour_f32;
+
+	EXPECT_EQ(max_po2_f32, fp16_ieee_to_fp32_bits(max_po2_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << max_po2_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(max_po2_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << max_po2_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, denormalized_powers_of_2) {
+	const uint16_t exp2_minus_15_f16 = UINT16_C(0x0200);
+	const uint16_t exp2_minus_16_f16 = UINT16_C(0x0100);
+	const uint16_t exp2_minus_17_f16 = UINT16_C(0x0080);
+	const uint16_t exp2_minus_18_f16 = UINT16_C(0x0040);
+	const uint16_t exp2_minus_19_f16 = UINT16_C(0x0020);
+	const uint16_t exp2_minus_20_f16 = UINT16_C(0x0010);
+	const uint16_t exp2_minus_21_f16 = UINT16_C(0x0008);
+	const uint16_t exp2_minus_22_f16 = UINT16_C(0x0004);
+	const uint16_t exp2_minus_23_f16 = UINT16_C(0x0002);
+	const uint16_t exp2_minus_24_f16 = UINT16_C(0x0001);
+
+	const uint32_t exp2_minus_15_f32 = UINT32_C(0x38000000);
+	const uint32_t exp2_minus_16_f32 = UINT32_C(0x37800000);
+	const uint32_t exp2_minus_17_f32 = UINT32_C(0x37000000);
+	const uint32_t exp2_minus_18_f32 = UINT32_C(0x36800000);
+	const uint32_t exp2_minus_19_f32 = UINT32_C(0x36000000);
+	const uint32_t exp2_minus_20_f32 = UINT32_C(0x35800000);
+	const uint32_t exp2_minus_21_f32 = UINT32_C(0x35000000);
+	const uint32_t exp2_minus_22_f32 = UINT32_C(0x34800000);
+	const uint32_t exp2_minus_23_f32 = UINT32_C(0x34000000);
+	const uint32_t exp2_minus_24_f32 = UINT32_C(0x33800000);
+
+	EXPECT_EQ(exp2_minus_15_f32, fp16_ieee_to_fp32_bits(exp2_minus_15_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_15_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_15_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_15_f32;
+
+	EXPECT_EQ(exp2_minus_16_f32, fp16_ieee_to_fp32_bits(exp2_minus_16_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_16_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_16_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_16_f32;
+
+	EXPECT_EQ(exp2_minus_17_f32, fp16_ieee_to_fp32_bits(exp2_minus_17_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_17_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_17_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_17_f32;
+
+	EXPECT_EQ(exp2_minus_18_f32, fp16_ieee_to_fp32_bits(exp2_minus_18_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_18_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_18_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_18_f32;
+
+	EXPECT_EQ(exp2_minus_19_f32, fp16_ieee_to_fp32_bits(exp2_minus_19_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_19_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_19_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_19_f32;
+
+	EXPECT_EQ(exp2_minus_20_f32, fp16_ieee_to_fp32_bits(exp2_minus_20_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_20_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_20_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_20_f32;
+
+	EXPECT_EQ(exp2_minus_21_f32, fp16_ieee_to_fp32_bits(exp2_minus_21_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_21_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_21_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_21_f32;
+
+	EXPECT_EQ(exp2_minus_22_f32, fp16_ieee_to_fp32_bits(exp2_minus_22_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_22_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_22_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_22_f32;
+
+	EXPECT_EQ(exp2_minus_23_f32, fp16_ieee_to_fp32_bits(exp2_minus_23_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_23_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_23_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_23_f32;
+
+	EXPECT_EQ(exp2_minus_24_f32, fp16_ieee_to_fp32_bits(exp2_minus_24_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_24_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(exp2_minus_24_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_24_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, zero) {
+	const uint16_t positive_zero_f16 = UINT16_C(0x0000);
+	const uint16_t negative_zero_f16 = UINT16_C(0x8000);
+
+	const uint32_t positive_zero_f32 = UINT32_C(0x00000000);
+	const uint32_t negative_zero_f32 = UINT32_C(0x80000000);
+
+	EXPECT_EQ(positive_zero_f32, fp16_ieee_to_fp32_bits(positive_zero_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << positive_zero_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(positive_zero_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_zero_f32;
+
+	EXPECT_EQ(negative_zero_f32, fp16_ieee_to_fp32_bits(negative_zero_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << negative_zero_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(negative_zero_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_zero_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, infinity) {
+	const uint16_t positive_infinity_f16 = UINT16_C(0x7C00);
+	const uint16_t negative_infinity_f16 = UINT16_C(0xFC00);
+
+	const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+	const uint32_t negative_infinity_f32 = UINT32_C(0xFF800000);
+
+	EXPECT_EQ(positive_infinity_f32, fp16_ieee_to_fp32_bits(positive_infinity_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << positive_infinity_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(positive_infinity_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32;
+
+	EXPECT_EQ(negative_infinity_f32, fp16_ieee_to_fp32_bits(negative_infinity_f16)) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << negative_infinity_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(negative_infinity_f16) << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, positive_nan) {
+	for (uint16_t m = UINT16_C(1); m < UINT16_C(0x0400); m++) {
+		const uint16_t nan_f16 = UINT16_C(0x7C00) | m;
+		const uint32_t nan_f32 = fp16_ieee_to_fp32_bits(nan_f16);
+
+		/* Check sign */
+		EXPECT_EQ(nan_f32 & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_f32;
+
+		/* Check exponent */
+		EXPECT_EQ(nan_f32 & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_f32;
+
+		/* Check mantissa */
+		EXPECT_NE(nan_f32 & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_f32;
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, negative_nan) {
+	for (uint16_t m = UINT16_C(1); m < UINT16_C(0x0400); m++) {
+		const uint16_t nan_f16 = UINT16_C(0xFC00) | m;
+		const uint32_t nan_f32 = fp16_ieee_to_fp32_bits(nan_f16);
+
+		/* Check sign */
+		EXPECT_NE(nan_f32 & UINT32_C(0x80000000), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_f32;
+
+		/* Check exponent */
+		EXPECT_EQ(nan_f32 & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_f32;
+
+		/* Check mantissa */
+		EXPECT_NE(nan_f32 & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_f32;
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 15; e++) {
+		for (uint16_t h = 0; h < 0x0400; h++) {
+			const uint16_t fp16 = h + ((uint16_t) (e + exponentBias) << 10);
+			const uint32_t fp32 = fp16::normalizedValues[h] + ((uint32_t) e << 23);
+			EXPECT_EQ(fp32, fp16_ieee_to_fp32_bits(fp16)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16) << ", " <<
+				"F32 = 0x" << std::setw(8) << fp32;
+		}
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 15; e++) {
+		for (uint16_t h = 0; h < 0x0400; h++) {
+			const uint16_t fp16 = (h + ((uint16_t) (e + exponentBias) << 10)) ^ UINT16_C(0x8000);
+			const uint32_t fp32 = (fp16::normalizedValues[h] + ((uint32_t) e << 23)) ^ UINT32_C(0x80000000);
+			EXPECT_EQ(fp32, fp16_ieee_to_fp32_bits(fp16)) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16) << ", " <<
+				"F32 = 0x" << std::setw(8) << fp32;
+		}
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h++) {
+		EXPECT_EQ(fp16::denormalizedValues[h], fp16_ieee_to_fp32_bits(h)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << h << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(h) << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16::denormalizedValues[h];
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_BITS, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h++) {
+		const uint16_t fp16 = h ^ UINT16_C(0x8000);
+		const uint32_t fp32 = fp16::denormalizedValues[h] ^ UINT32_C(0x80000000);
+		EXPECT_EQ(fp32, fp16_ieee_to_fp32_bits(fp16)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16) << ", " <<
+			"F32 = 0x" << std::setw(8) << fp32;
+	}
+}
diff --git a/test/ieee-to-fp32-psimd.cc b/test/ieee-to-fp32-psimd.cc
new file mode 100644
index 0000000..6cd2ac2
--- /dev/null
+++ b/test/ieee-to-fp32-psimd.cc
@@ -0,0 +1,312 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include <fp16/psimd.h>
+
+
+TEST(FP16_IEEE_TO_FP32_PSIMD, infinity) {
+	const uint16_t positive_infinity_f16 = UINT16_C(0x7C00);
+	const uint16_t negative_infinity_f16 = UINT16_C(0xFC00);
+
+	const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+	const uint32_t negative_infinity_f32 = UINT32_C(0xFF800000);
+
+	const psimd_u16 fp16 = {
+		positive_infinity_f16, negative_infinity_f16,
+		negative_infinity_f16, positive_infinity_f16
+	};
+	const psimd_u32 fp32 = (psimd_u32) fp16_ieee_to_fp32_psimd(fp16);
+
+	EXPECT_EQ(positive_infinity_f32, fp32[0]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32;
+	EXPECT_EQ(negative_infinity_f32, fp32[1]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32;
+	EXPECT_EQ(negative_infinity_f32, fp32[2]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32;
+	EXPECT_EQ(positive_infinity_f32, fp32[3]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_PSIMD, positive_nan) {
+	for (uint16_t h = 0; h < 0x0400; h += 4) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0x7C00 + (h == 0)) /* Avoid infinity */,
+			(uint16_t) (h + 0x7C01),
+			(uint16_t) (h + 0x7C02),
+			(uint16_t) (h + 0x7C03),
+		};
+		const psimd_u32 fp32 = (psimd_u32) fp16_ieee_to_fp32_psimd(fp16);
+
+		/* Check sign */
+		EXPECT_EQ(fp32[0] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0];
+		EXPECT_EQ(fp32[1] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1];
+		EXPECT_EQ(fp32[2] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2];
+		EXPECT_EQ(fp32[3] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3];
+
+		/* Check exponent */
+		EXPECT_EQ(fp32[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0];
+		EXPECT_EQ(fp32[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1];
+		EXPECT_EQ(fp32[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2];
+		EXPECT_EQ(fp32[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3];
+
+		/* Check mantissa */
+		EXPECT_NE(fp32[0] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0];
+		EXPECT_NE(fp32[1] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1];
+		EXPECT_NE(fp32[2] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2];
+		EXPECT_NE(fp32[3] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3];
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_PSIMD, negative_nan) {
+	for (uint16_t h = 0; h < 0x0400; h += 4) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0xFC00 + (h == 0)) /* Avoid infinity */,
+			(uint16_t) (h + 0xFC01),
+			(uint16_t) (h + 0xFC02),
+			(uint16_t) (h + 0xFC03),
+		};
+		const psimd_u32 fp32 = (psimd_u32) fp16_ieee_to_fp32_psimd(fp16);
+
+		/* Check sign */
+		EXPECT_EQ(fp32[0] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0];
+		EXPECT_EQ(fp32[1] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1];
+		EXPECT_EQ(fp32[2] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2];
+		EXPECT_EQ(fp32[3] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3];
+
+		/* Check exponent */
+		EXPECT_EQ(fp32[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0];
+		EXPECT_EQ(fp32[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1];
+		EXPECT_EQ(fp32[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2];
+		EXPECT_EQ(fp32[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3];
+
+		/* Check mantissa */
+		EXPECT_NE(fp32[0] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0];
+		EXPECT_NE(fp32[1] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1];
+		EXPECT_NE(fp32[2] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2];
+		EXPECT_NE(fp32[3] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3];
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_PSIMD, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 15; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += 4) {
+			const psimd_u16 fp16 = {
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 1),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 2),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 3),
+			};
+			const psimd_u32 fp32 = (psimd_u32) fp16_ieee_to_fp32_psimd(fp16);
+
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+		}
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_PSIMD, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 15; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += 4) {
+			const psimd_u16 fp16 = {
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8000),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8001),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8002),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8003),
+			};
+			const psimd_u32 fp32 = (psimd_u32) fp16_ieee_to_fp32_psimd(fp16);
+
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+		}
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_PSIMD, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += 4) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0),
+			(uint16_t) (h + 1),
+			(uint16_t) (h + 2),
+			(uint16_t) (h + 3),
+		};
+		const psimd_u32 fp32 = (psimd_u32) fp16_ieee_to_fp32_psimd(fp16);
+
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_PSIMD, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += 4) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0x8000),
+			(uint16_t) (h + 0x8001),
+			(uint16_t) (h + 0x8002),
+			(uint16_t) (h + 0x8003),
+		};
+		const psimd_u32 fp32 = (psimd_u32) fp16_ieee_to_fp32_psimd(fp16);
+
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+	}
+}
diff --git a/test/ieee-to-fp32-value.cc b/test/ieee-to-fp32-value.cc
new file mode 100644
index 0000000..d69faae
--- /dev/null
+++ b/test/ieee-to-fp32-value.cc
@@ -0,0 +1,420 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <cmath>
+
+#include <fp16.h>
+#include "tables.h"
+
+
+TEST(FP16_IEEE_TO_FP32_VALUE, normalized_powers_of_2) {
+	const uint16_t min_po2_f16   = UINT16_C(0x0400);
+	const uint16_t eighths_f16   = UINT16_C(0x3000);
+	const uint16_t quarter_f16   = UINT16_C(0x3400);
+	const uint16_t half_f16      = UINT16_C(0x3800);
+	const uint16_t one_f16       = UINT16_C(0x3C00);
+	const uint16_t two_f16       = UINT16_C(0x4000);
+	const uint16_t four_f16      = UINT16_C(0x4400);
+	const uint16_t eight_f16     = UINT16_C(0x4800);
+	const uint16_t sixteen_f16   = UINT16_C(0x4C00);
+	const uint16_t thirtytwo_f16 = UINT16_C(0x5000);
+	const uint16_t sixtyfour_f16 = UINT16_C(0x5400);
+	const uint16_t max_po2_f16   = UINT16_C(0x7800);
+
+	const uint32_t min_po2_f32   = UINT32_C(0x38800000);
+	const uint32_t eighths_f32   = UINT32_C(0x3E000000);
+	const uint32_t quarter_f32   = UINT32_C(0x3E800000);
+	const uint32_t half_f32      = UINT32_C(0x3F000000);
+	const uint32_t one_f32       = UINT32_C(0x3F800000);
+	const uint32_t two_f32       = UINT32_C(0x40000000);
+	const uint32_t four_f32      = UINT32_C(0x40800000);
+	const uint32_t eight_f32     = UINT32_C(0x41000000);
+	const uint32_t sixteen_f32   = UINT32_C(0x41800000);
+	const uint32_t thirtytwo_f32 = UINT32_C(0x42000000);
+	const uint32_t sixtyfour_f32 = UINT32_C(0x42800000);
+	const uint32_t max_po2_f32   = UINT32_C(0x47000000);
+
+	const float min_po2_value = fp16_ieee_to_fp32_value(min_po2_f16);
+	uint32_t min_po2_bits;
+	memcpy(&min_po2_bits, &min_po2_value, sizeof(min_po2_bits));
+	EXPECT_EQ(min_po2_f32, min_po2_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << min_po2_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << min_po2_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << min_po2_f32;
+
+	const float eighths_value = fp16_ieee_to_fp32_value(eighths_f16);
+	uint32_t eighths_bits;
+	memcpy(&eighths_bits, &eighths_value, sizeof(eighths_bits));
+	EXPECT_EQ(eighths_f32, eighths_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << eighths_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << eighths_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << eighths_f32;
+
+	const float quarter_value = fp16_ieee_to_fp32_value(quarter_f16);
+	uint32_t quarter_bits;
+	memcpy(&quarter_bits, &quarter_value, sizeof(quarter_bits));
+	EXPECT_EQ(quarter_f32, quarter_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << quarter_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << quarter_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << quarter_f32;
+
+	const float half_value = fp16_ieee_to_fp32_value(half_f16);
+	uint32_t half_bits;
+	memcpy(&half_bits, &half_value, sizeof(half_bits));
+	EXPECT_EQ(half_f32, half_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << half_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << half_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << half_f32;
+
+	const float one_value = fp16_ieee_to_fp32_value(one_f16);
+	uint32_t one_bits;
+	memcpy(&one_bits, &one_value, sizeof(one_bits));
+	EXPECT_EQ(one_f32, one_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << one_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << one_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << one_f32;
+
+	const float two_value = fp16_ieee_to_fp32_value(two_f16);
+	uint32_t two_bits;
+	memcpy(&two_bits, &two_value, sizeof(two_bits));
+	EXPECT_EQ(two_f32, two_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << two_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << two_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << two_f32;
+
+	const float four_value = fp16_ieee_to_fp32_value(four_f16);
+	uint32_t four_bits;
+	memcpy(&four_bits, &four_value, sizeof(four_bits));
+	EXPECT_EQ(four_f32, four_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << four_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << four_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << four_f32;
+
+	const float eight_value = fp16_ieee_to_fp32_value(eight_f16);
+	uint32_t eight_bits;
+	memcpy(&eight_bits, &eight_value, sizeof(eight_bits));
+	EXPECT_EQ(eight_f32, eight_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << eight_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << eight_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << eight_f32;
+
+	const float sixteen_value = fp16_ieee_to_fp32_value(sixteen_f16);
+	uint32_t sixteen_bits;
+	memcpy(&sixteen_bits, &sixteen_value, sizeof(sixteen_bits));
+	EXPECT_EQ(sixteen_f32, sixteen_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << sixteen_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << sixteen_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << sixteen_f32;
+
+	const float thirtytwo_value = fp16_ieee_to_fp32_value(thirtytwo_f16);
+	uint32_t thirtytwo_bits;
+	memcpy(&thirtytwo_bits, &thirtytwo_value, sizeof(thirtytwo_bits));
+	EXPECT_EQ(thirtytwo_f32, thirtytwo_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << thirtytwo_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << thirtytwo_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << thirtytwo_f32;
+
+	const float sixtyfour_value = fp16_ieee_to_fp32_value(sixtyfour_f16);
+	uint32_t sixtyfour_bits;
+	memcpy(&sixtyfour_bits, &sixtyfour_value, sizeof(sixtyfour_bits));
+	EXPECT_EQ(sixtyfour_f32, sixtyfour_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << sixtyfour_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << sixtyfour_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << sixtyfour_f32;
+
+	const float max_po2_value = fp16_ieee_to_fp32_value(max_po2_f16);
+	uint32_t max_po2_bits;
+	memcpy(&max_po2_bits, &max_po2_value, sizeof(max_po2_bits));
+	EXPECT_EQ(max_po2_f32, max_po2_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << max_po2_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << max_po2_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << max_po2_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, denormalized_powers_of_2) {
+	const uint16_t exp2_minus_15_f16 = UINT16_C(0x0200);
+	const uint16_t exp2_minus_16_f16 = UINT16_C(0x0100);
+	const uint16_t exp2_minus_17_f16 = UINT16_C(0x0080);
+	const uint16_t exp2_minus_18_f16 = UINT16_C(0x0040);
+	const uint16_t exp2_minus_19_f16 = UINT16_C(0x0020);
+	const uint16_t exp2_minus_20_f16 = UINT16_C(0x0010);
+	const uint16_t exp2_minus_21_f16 = UINT16_C(0x0008);
+	const uint16_t exp2_minus_22_f16 = UINT16_C(0x0004);
+	const uint16_t exp2_minus_23_f16 = UINT16_C(0x0002);
+	const uint16_t exp2_minus_24_f16 = UINT16_C(0x0001);
+
+	const uint32_t exp2_minus_15_f32 = UINT32_C(0x38000000);
+	const uint32_t exp2_minus_16_f32 = UINT32_C(0x37800000);
+	const uint32_t exp2_minus_17_f32 = UINT32_C(0x37000000);
+	const uint32_t exp2_minus_18_f32 = UINT32_C(0x36800000);
+	const uint32_t exp2_minus_19_f32 = UINT32_C(0x36000000);
+	const uint32_t exp2_minus_20_f32 = UINT32_C(0x35800000);
+	const uint32_t exp2_minus_21_f32 = UINT32_C(0x35000000);
+	const uint32_t exp2_minus_22_f32 = UINT32_C(0x34800000);
+	const uint32_t exp2_minus_23_f32 = UINT32_C(0x34000000);
+	const uint32_t exp2_minus_24_f32 = UINT32_C(0x33800000);
+
+	const float exp2_minus_15_value = fp16_ieee_to_fp32_value(exp2_minus_15_f16);
+	uint32_t exp2_minus_15_bits;
+	memcpy(&exp2_minus_15_bits, &exp2_minus_15_value, sizeof(exp2_minus_15_bits));
+	EXPECT_EQ(exp2_minus_15_f32, exp2_minus_15_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_15_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_15_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_15_f32;
+
+	const float exp2_minus_16_value = fp16_ieee_to_fp32_value(exp2_minus_16_f16);
+	uint32_t exp2_minus_16_bits;
+	memcpy(&exp2_minus_16_bits, &exp2_minus_16_value, sizeof(exp2_minus_16_bits));
+	EXPECT_EQ(exp2_minus_16_f32, exp2_minus_16_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_16_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_16_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_16_f32;
+
+	const float exp2_minus_17_value = fp16_ieee_to_fp32_value(exp2_minus_17_f16);
+	uint32_t exp2_minus_17_bits;
+	memcpy(&exp2_minus_17_bits, &exp2_minus_17_value, sizeof(exp2_minus_17_bits));
+	EXPECT_EQ(exp2_minus_17_f32, exp2_minus_17_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_17_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_17_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_17_f32;
+
+	const float exp2_minus_18_value = fp16_ieee_to_fp32_value(exp2_minus_18_f16);
+	uint32_t exp2_minus_18_bits;
+	memcpy(&exp2_minus_18_bits, &exp2_minus_18_value, sizeof(exp2_minus_18_bits));
+	EXPECT_EQ(exp2_minus_18_f32, exp2_minus_18_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_18_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_18_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_18_f32;
+
+	const float exp2_minus_19_value = fp16_ieee_to_fp32_value(exp2_minus_19_f16);
+	uint32_t exp2_minus_19_bits;
+	memcpy(&exp2_minus_19_bits, &exp2_minus_19_value, sizeof(exp2_minus_19_bits));
+	EXPECT_EQ(exp2_minus_19_f32, exp2_minus_19_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_19_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_19_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_19_f32;
+
+	const float exp2_minus_20_value = fp16_ieee_to_fp32_value(exp2_minus_20_f16);
+	uint32_t exp2_minus_20_bits;
+	memcpy(&exp2_minus_20_bits, &exp2_minus_20_value, sizeof(exp2_minus_20_bits));
+	EXPECT_EQ(exp2_minus_20_f32, exp2_minus_20_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_20_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_20_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_20_f32;
+
+	const float exp2_minus_21_value = fp16_ieee_to_fp32_value(exp2_minus_21_f16);
+	uint32_t exp2_minus_21_bits;
+	memcpy(&exp2_minus_21_bits, &exp2_minus_21_value, sizeof(exp2_minus_21_bits));
+	EXPECT_EQ(exp2_minus_21_f32, exp2_minus_21_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_21_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_21_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_21_f32;
+
+	const float exp2_minus_22_value = fp16_ieee_to_fp32_value(exp2_minus_22_f16);
+	uint32_t exp2_minus_22_bits;
+	memcpy(&exp2_minus_22_bits, &exp2_minus_22_value, sizeof(exp2_minus_22_bits));
+	EXPECT_EQ(exp2_minus_22_f32, exp2_minus_22_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_22_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_22_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_22_f32;
+
+	const float exp2_minus_23_value = fp16_ieee_to_fp32_value(exp2_minus_23_f16);
+	uint32_t exp2_minus_23_bits;
+	memcpy(&exp2_minus_23_bits, &exp2_minus_23_value, sizeof(exp2_minus_23_bits));
+	EXPECT_EQ(exp2_minus_23_f32, exp2_minus_23_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_23_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_23_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_23_f32;
+
+	const float exp2_minus_24_value = fp16_ieee_to_fp32_value(exp2_minus_24_f16);
+	uint32_t exp2_minus_24_bits;
+	memcpy(&exp2_minus_24_bits, &exp2_minus_24_value, sizeof(exp2_minus_24_bits));
+	EXPECT_EQ(exp2_minus_24_f32, exp2_minus_24_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << exp2_minus_24_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << exp2_minus_24_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << exp2_minus_24_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, zero) {
+	const uint16_t positive_zero_f16 = UINT16_C(0x0000);
+	const uint16_t negative_zero_f16 = UINT16_C(0x8000);
+
+	const uint32_t positive_zero_f32 = UINT32_C(0x00000000);
+	const uint32_t negative_zero_f32 = UINT32_C(0x80000000);
+
+	const float positive_zero_value = fp16_ieee_to_fp32_value(positive_zero_f16);
+	uint32_t positive_zero_bits;
+	memcpy(&positive_zero_bits, &positive_zero_value, sizeof(positive_zero_bits));
+	EXPECT_EQ(positive_zero_f32, positive_zero_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << positive_zero_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << positive_zero_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_zero_f32;
+
+	const float negative_zero_value = fp16_ieee_to_fp32_value(negative_zero_f16);
+	uint32_t negative_zero_bits;
+	memcpy(&negative_zero_bits, &negative_zero_value, sizeof(negative_zero_bits));
+	EXPECT_EQ(negative_zero_f32, negative_zero_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << negative_zero_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << negative_zero_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_zero_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, infinity) {
+	const uint16_t positive_infinity_f16 = UINT16_C(0x7C00);
+	const uint16_t negative_infinity_f16 = UINT16_C(0xFC00);
+
+	const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+	const uint32_t negative_infinity_f32 = UINT32_C(0xFF800000);
+
+	const float positive_infinity_value = fp16_ieee_to_fp32_value(positive_infinity_f16);
+	uint32_t positive_infinity_bits;
+	memcpy(&positive_infinity_bits, &positive_infinity_value, sizeof(positive_infinity_bits));
+	EXPECT_EQ(positive_infinity_f32, positive_infinity_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << positive_infinity_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << positive_infinity_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32;
+
+	const float negative_infinity_value = fp16_ieee_to_fp32_value(negative_infinity_f16);
+	uint32_t negative_infinity_bits;
+	memcpy(&negative_infinity_bits, &negative_infinity_value, sizeof(negative_infinity_bits));
+	EXPECT_EQ(negative_infinity_f32, negative_infinity_bits) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << negative_infinity_f16 << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << negative_infinity_bits << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, positive_nan) {
+	using std::signbit;
+	using std::isnan;
+	for (uint16_t m = UINT16_C(1); m < UINT16_C(0x0400); m++) {
+		const uint16_t nan_f16 = UINT16_C(0x7C00) | m;
+		const float nan_f32 = fp16_ieee_to_fp32_value(nan_f16);
+		uint32_t nan_bits;
+		memcpy(&nan_bits, &nan_f32, sizeof(nan_bits));
+
+		/* Check if NaN */
+		EXPECT_TRUE(isnan(nan_f32)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_bits;
+
+		/* Check sign */
+		EXPECT_EQ(signbit(nan_f32), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_bits;
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, negative_nan) {
+	using std::signbit;
+	using std::isnan;
+	for (uint16_t m = UINT16_C(1); m < UINT16_C(0x0400); m++) {
+		const uint16_t nan_f16 = UINT16_C(0xFC00) | m;
+		const float nan_f32 = fp16_ieee_to_fp32_value(nan_f16);
+		uint32_t nan_bits;
+		memcpy(&nan_bits, &nan_f32, sizeof(nan_bits));
+
+		/* Check if NaN */
+		EXPECT_TRUE(isnan(nan_f32)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_bits;
+
+		/* Check sign */
+		EXPECT_EQ(signbit(nan_f32), 1) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << nan_f16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << nan_bits;
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 15; e++) {
+		for (uint16_t h = 0; h < 0x0400; h++) {
+			const uint16_t fp16 = h + ((uint16_t) (e + exponentBias) << 10);
+			const uint32_t fp32 = fp16::normalizedValues[h] + ((uint32_t) e << 23);
+			const float value = fp16_ieee_to_fp32_value(fp16);
+			uint32_t bits;
+			memcpy(&bits, &value, sizeof(bits));
+			EXPECT_EQ(fp32, bits) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << bits << ", " <<
+				"F32 = 0x" << std::setw(8) << fp32;
+		}
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 15; e++) {
+		for (uint16_t h = 0; h < 0x0400; h++) {
+			const uint16_t fp16 = (h + ((uint16_t) (e + exponentBias) << 10)) ^ UINT16_C(0x8000);
+			const uint32_t fp32 = (fp16::normalizedValues[h] + ((uint32_t) e << 23)) ^ UINT32_C(0x80000000);
+			const float value = fp16_ieee_to_fp32_value(fp16);
+			uint32_t bits;
+			memcpy(&bits, &value, sizeof(bits));
+			EXPECT_EQ(fp32, bits) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << bits << ", " <<
+				"F32 = 0x" << std::setw(8) << fp32;
+		}
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h++) {
+		const float value = fp16_ieee_to_fp32_value(h);
+		uint32_t bits;
+		memcpy(&bits, &value, sizeof(bits));
+		EXPECT_EQ(fp16::denormalizedValues[h], bits) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << h << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << bits << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16::denormalizedValues[h];
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32_VALUE, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h++) {
+		const uint16_t fp16 = h ^ UINT16_C(0x8000);
+		const uint32_t fp32 = fp16::denormalizedValues[h] ^ UINT32_C(0x80000000);
+		const float value = fp16_ieee_to_fp32_value(fp16);
+		uint32_t bits;
+		memcpy(&bits, &value, sizeof(bits));
+		EXPECT_EQ(fp32, bits) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16 << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << bits << ", " <<
+			"F32 = 0x" << std::setw(8) << fp32;
+	}
+}
diff --git a/test/ieee-to-fp32x2-psimd.cc b/test/ieee-to-fp32x2-psimd.cc
new file mode 100644
index 0000000..602e341
--- /dev/null
+++ b/test/ieee-to-fp32x2-psimd.cc
@@ -0,0 +1,541 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+#include <fp16/psimd.h>
+
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, infinity) {
+	const uint16_t positive_infinity_f16 = UINT16_C(0x7C00);
+	const uint16_t negative_infinity_f16 = UINT16_C(0xFC00);
+
+	const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000);
+	const uint32_t negative_infinity_f32 = UINT32_C(0xFF800000);
+
+	const psimd_u16 fp16 = {
+		positive_infinity_f16, negative_infinity_f16,
+		negative_infinity_f16, positive_infinity_f16,
+		positive_infinity_f16, positive_infinity_f16,
+		negative_infinity_f16, negative_infinity_f16
+	};
+	const psimd_u32x2 fp32 =
+		psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+	EXPECT_EQ(positive_infinity_f32, fp32.lo[0]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32;
+	EXPECT_EQ(negative_infinity_f32, fp32.lo[1]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32;
+	EXPECT_EQ(negative_infinity_f32, fp32.lo[2]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32;
+	EXPECT_EQ(positive_infinity_f32, fp32.lo[3]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32;
+	EXPECT_EQ(positive_infinity_f32, fp32.hi[0]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32;
+	EXPECT_EQ(positive_infinity_f32, fp32.hi[1]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+		"F32 = 0x" << std::setw(8) << positive_infinity_f32;
+	EXPECT_EQ(negative_infinity_f32, fp32.hi[2]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32;
+	EXPECT_EQ(negative_infinity_f32, fp32.hi[3]) <<
+		std::hex << std::uppercase << std::setfill('0') <<
+		"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+		"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+		"F32 = 0x" << std::setw(8) << negative_infinity_f32;
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, positive_nan) {
+	for (uint16_t h = 0; h < 0x0400; h += 8) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0x7C00 + (h == 0)) /* Avoid infinity */,
+			(uint16_t) (h + 0x7C01),
+			(uint16_t) (h + 0x7C02),
+			(uint16_t) (h + 0x7C03),
+			(uint16_t) (h + 0x7C04),
+			(uint16_t) (h + 0x7C05),
+			(uint16_t) (h + 0x7C06),
+			(uint16_t) (h + 0x7C07),
+		};
+		const psimd_u32x2 fp32 =
+			psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+		/* Check sign */
+		EXPECT_EQ(fp32.lo[0] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+		EXPECT_EQ(fp32.lo[1] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+		EXPECT_EQ(fp32.lo[2] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+		EXPECT_EQ(fp32.lo[3] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+		EXPECT_EQ(fp32.hi[0] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+		EXPECT_EQ(fp32.hi[1] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+		EXPECT_EQ(fp32.hi[2] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+		EXPECT_EQ(fp32.hi[3] & UINT32_C(0x80000000), 0) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+
+		/* Check exponent */
+		EXPECT_EQ(fp32.lo[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+		EXPECT_EQ(fp32.lo[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+		EXPECT_EQ(fp32.lo[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+		EXPECT_EQ(fp32.lo[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+		EXPECT_EQ(fp32.hi[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+		EXPECT_EQ(fp32.hi[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+		EXPECT_EQ(fp32.hi[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+		EXPECT_EQ(fp32.hi[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+
+		/* Check mantissa */
+		EXPECT_NE(fp32.lo[0] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+		EXPECT_NE(fp32.lo[1] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+		EXPECT_NE(fp32.lo[2] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+		EXPECT_NE(fp32.lo[3] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+		EXPECT_NE(fp32.hi[0] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+		EXPECT_NE(fp32.hi[1] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+		EXPECT_NE(fp32.hi[2] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+		EXPECT_NE(fp32.hi[3] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, negative_nan) {
+	for (uint16_t h = 0; h < 0x0400; h += 8) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0xFC00 + (h == 0)) /* Avoid infinity */,
+			(uint16_t) (h + 0xFC01),
+			(uint16_t) (h + 0xFC02),
+			(uint16_t) (h + 0xFC03),
+			(uint16_t) (h + 0xFC04),
+			(uint16_t) (h + 0xFC05),
+			(uint16_t) (h + 0xFC06),
+			(uint16_t) (h + 0xFC07),
+		};
+		const psimd_u32x2 fp32 =
+			psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+		/* Check sign */
+		EXPECT_EQ(fp32.lo[0] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+		EXPECT_EQ(fp32.lo[1] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+		EXPECT_EQ(fp32.lo[2] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+		EXPECT_EQ(fp32.lo[3] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+		EXPECT_EQ(fp32.hi[0] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+		EXPECT_EQ(fp32.hi[1] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+		EXPECT_EQ(fp32.hi[2] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+		EXPECT_EQ(fp32.hi[3] & UINT32_C(0x80000000), UINT32_C(0x80000000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+
+		/* Check exponent */
+		EXPECT_EQ(fp32.lo[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+		EXPECT_EQ(fp32.lo[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+		EXPECT_EQ(fp32.lo[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+		EXPECT_EQ(fp32.lo[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+		EXPECT_EQ(fp32.hi[0] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+		EXPECT_EQ(fp32.hi[1] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+		EXPECT_EQ(fp32.hi[2] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+		EXPECT_EQ(fp32.hi[3] & UINT32_C(0x7F800000), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+
+		/* Check mantissa */
+		EXPECT_NE(fp32.lo[0] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0];
+		EXPECT_NE(fp32.lo[1] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1];
+		EXPECT_NE(fp32.lo[2] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2];
+		EXPECT_NE(fp32.lo[3] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3];
+		EXPECT_NE(fp32.hi[0] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0];
+		EXPECT_NE(fp32.hi[1] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1];
+		EXPECT_NE(fp32.hi[2] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2];
+		EXPECT_NE(fp32.hi[3] & UINT32_C(0x007FFFFF), UINT32_C(0)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3];
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 15; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += 8) {
+			const psimd_u16 fp16 = {
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 1),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 2),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 3),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 4),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 5),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 6),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 7)
+			};
+			const psimd_u32x2 fp32 =
+				psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[4]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[5]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[6]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[7]);
+		}
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 15; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += 8) {
+			const psimd_u16 fp16 = {
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8000),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8001),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8002),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8003),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8004),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8005),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8006),
+				(uint16_t) (h + ((e + exponentBias) << 10) + 0x8007)
+			};
+			const psimd_u32x2 fp32 =
+				psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[4]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[5]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[6]);
+			EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[7]);
+		}
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += 8) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0),
+			(uint16_t) (h + 1),
+			(uint16_t) (h + 2),
+			(uint16_t) (h + 3),
+			(uint16_t) (h + 4),
+			(uint16_t) (h + 5),
+			(uint16_t) (h + 6),
+			(uint16_t) (h + 7)
+		};
+		const psimd_u32x2 fp32 =
+			psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[4]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[5]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[6]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[7]);
+	}
+}
+
+TEST(FP16_IEEE_TO_FP32x2_PSIMD, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += 8) {
+		const psimd_u16 fp16 = {
+			(uint16_t) (h + 0x8000),
+			(uint16_t) (h + 0x8001),
+			(uint16_t) (h + 0x8002),
+			(uint16_t) (h + 0x8003),
+			(uint16_t) (h + 0x8004),
+			(uint16_t) (h + 0x8005),
+			(uint16_t) (h + 0x8006),
+			(uint16_t) (h + 0x8007)
+		};
+		const psimd_u32x2 fp32 =
+			psimd_cast_f32x2_u32x2(fp16_ieee_to_fp32x2_psimd(fp16));
+
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[0]), fp32.lo[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[0] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[0]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[1]), fp32.lo[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[1] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[1]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[2]), fp32.lo[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[2] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[2]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[3]), fp32.lo[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[3] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.lo[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[3]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[4]), fp32.hi[0]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[4] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[0] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[4]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[5]), fp32.hi[1]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[5] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[1] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[5]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[6]), fp32.hi[2]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[6] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[2] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[6]);
+		EXPECT_EQ(fp16_ieee_to_fp32_bits(fp16[7]), fp32.hi[3]) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"F16 = 0x" << std::setw(4) << fp16[7] << ", " <<
+			"F32(F16) = 0x" << std::setw(8) << fp32.hi[3] << ", " <<
+			"F32 = 0x" << std::setw(8) << fp16_ieee_to_fp32_bits(fp16[7]);
+	}
+}
diff --git a/test/peachpy/alt-xmm-to-fp32-xmm-avx.cc b/test/peachpy/alt-xmm-to-fp32-xmm-avx.cc
new file mode 100644
index 0000000..ec9ee02
--- /dev/null
+++ b/test/peachpy/alt-xmm-to-fp32-xmm-avx.cc
@@ -0,0 +1,96 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+
+
+extern "C" void fp16_alt_xmm_to_fp32_xmm_peachpy__avx(const uint16_t* fp16, uint32_t* fp32);
+const size_t vector_elements = 4;
+
+
+TEST(FP16_ALT_XMM_TO_FP32_XMM, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
+			uint16_t fp16[vector_elements];
+			for (size_t i = 0; i < vector_elements; i++) {
+				fp16[i] = h + ((e + exponentBias) << 10) + i;
+			}
+			uint32_t fp32[vector_elements];
+			fp16_alt_xmm_to_fp32_xmm_peachpy__avx(fp16, fp32);
+
+			for (size_t i = 0; i < vector_elements; i++) {
+				EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
+					std::hex << std::uppercase << std::setfill('0') <<
+					"F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
+					"F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
+					"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
+					", lane " << i << "/" << vector_elements;
+			}
+		}
+	}
+}
+
+TEST(FP16_ALT_XMM_TO_FP32_XMM, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
+			uint16_t fp16[vector_elements];
+			for (size_t i = 0; i < vector_elements; i++) {
+				fp16[i] = 0x8000 + h + ((e + exponentBias) << 10) + i;
+			}
+			uint32_t fp32[vector_elements];
+			fp16_alt_xmm_to_fp32_xmm_peachpy__avx(fp16, fp32);
+
+			for (size_t i = 0; i < vector_elements; i++) {
+				EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
+					std::hex << std::uppercase << std::setfill('0') <<
+					"F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
+					"F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
+					"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
+					", lane " << i << "/" << vector_elements;
+			}
+		}
+	}
+}
+
+TEST(FP16_ALT_XMM_TO_FP32_XMM, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
+		uint16_t fp16[vector_elements];
+		for (size_t i = 0; i < vector_elements; i++) {
+			fp16[i] = h + i;
+		}
+		uint32_t fp32[vector_elements];
+		fp16_alt_xmm_to_fp32_xmm_peachpy__avx(fp16, fp32);
+
+		for (size_t i = 0; i < vector_elements; i++) {
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
+				", lane " << i << "/" << vector_elements;
+		}
+	}
+}
+
+TEST(FP16_ALT_XMM_TO_FP32_XMM, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
+		uint16_t fp16[vector_elements];
+		for (size_t i = 0; i < vector_elements; i++) {
+			fp16[i] = 0x8000 + h + i;
+		}
+		uint32_t fp32[vector_elements];
+		fp16_alt_xmm_to_fp32_xmm_peachpy__avx(fp16, fp32);
+
+		for (size_t i = 0; i < vector_elements; i++) {
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
+				", lane " << i << "/" << vector_elements;
+		}
+	}
+}
diff --git a/test/peachpy/alt-xmm-to-fp32-ymm-avx2.cc b/test/peachpy/alt-xmm-to-fp32-ymm-avx2.cc
new file mode 100644
index 0000000..b108cb5
--- /dev/null
+++ b/test/peachpy/alt-xmm-to-fp32-ymm-avx2.cc
@@ -0,0 +1,96 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+#include <fp16.h>
+
+
+extern "C" void fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(const uint16_t* fp16, uint32_t* fp32);
+const size_t vector_elements = 8;
+
+
+TEST(FP16_ALT_XMM_TO_FP32_YMM, positive_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
+			uint16_t fp16[vector_elements];
+			for (size_t i = 0; i < vector_elements; i++) {
+				fp16[i] = h + ((e + exponentBias) << 10) + i;
+			}
+			uint32_t fp32[vector_elements];
+			fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(fp16, fp32);
+
+			for (size_t i = 0; i < vector_elements; i++) {
+				EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
+					std::hex << std::uppercase << std::setfill('0') <<
+					"F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
+					"F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
+					"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
+					", lane " << i << "/" << vector_elements;
+			}
+		}
+	}
+}
+
+TEST(FP16_ALT_XMM_TO_FP32_YMM, negative_normalized_values) {
+	const uint32_t exponentBias = 15;
+	for (int32_t e = -14; e <= 16; e++) {
+		for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
+			uint16_t fp16[vector_elements];
+			for (size_t i = 0; i < vector_elements; i++) {
+				fp16[i] = 0x8000 + h + ((e + exponentBias) << 10) + i;
+			}
+			uint32_t fp32[vector_elements];
+			fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(fp16, fp32);
+
+			for (size_t i = 0; i < vector_elements; i++) {
+				EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
+					std::hex << std::uppercase << std::setfill('0') <<
+					"F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
+					"F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
+					"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
+					", lane " << i << "/" << vector_elements;
+			}
+		}
+	}
+}
+
+TEST(FP16_ALT_XMM_TO_FP32_YMM, positive_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
+		uint16_t fp16[vector_elements];
+		for (size_t i = 0; i < vector_elements; i++) {
+			fp16[i] = h + i;
+		}
+		uint32_t fp32[vector_elements];
+		fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(fp16, fp32);
+
+		for (size_t i = 0; i < vector_elements; i++) {
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
+				", lane " << i << "/" << vector_elements;
+		}
+	}
+}
+
+TEST(FP16_ALT_XMM_TO_FP32_YMM, negative_denormalized_values) {
+	for (uint16_t h = 0; h < 0x0400; h += vector_elements) {
+		uint16_t fp16[vector_elements];
+		for (size_t i = 0; i < vector_elements; i++) {
+			fp16[i] = 0x8000 + h + i;
+		}
+		uint32_t fp32[vector_elements];
+		fp16_alt_xmm_to_fp32_ymm_peachpy__avx2(fp16, fp32);
+
+		for (size_t i = 0; i < vector_elements; i++) {
+			EXPECT_EQ(fp16_alt_to_fp32_bits(fp16[i]), fp32[i]) <<
+				std::hex << std::uppercase << std::setfill('0') <<
+				"F16 = 0x" << std::setw(4) << fp16[i] << ", " <<
+				"F32(F16) = 0x" << std::setw(8) << fp32[i] << ", " <<
+				"F32 = 0x" << std::setw(8) << fp16_alt_to_fp32_bits(fp16[i]) <<
+				", lane " << i << "/" << vector_elements;
+		}
+	}
+}
diff --git a/test/peachpy/stubs.py b/test/peachpy/stubs.py
new file mode 100644
index 0000000..becd2eb
--- /dev/null
+++ b/test/peachpy/stubs.py
@@ -0,0 +1,38 @@
+from peachpy import *
+from peachpy.x86_64 import *
+
+import fp16.avx, fp16.avx2
+
+
+arg_fp16 = Argument(ptr(const_uint16_t), name="fp16")
+arg_fp32 = Argument(ptr(uint32_t), name="fp32")
+
+with Function("fp16_alt_xmm_to_fp32_ymm_peachpy__avx2", (arg_fp16, arg_fp32), target=uarch.default + isa.avx2):
+
+    reg_fp16 = GeneralPurposeRegister64()
+    LOAD.ARGUMENT(reg_fp16, arg_fp16)
+
+    reg_fp32 = GeneralPurposeRegister64()
+    LOAD.ARGUMENT(reg_fp32, arg_fp32)
+
+    xmm_fp16 = XMMRegister()
+    VMOVUPS(xmm_fp16, [reg_fp16])
+    ymm_fp32 = fp16.avx2.fp16_alt_xmm_to_fp32_ymm(xmm_fp16)
+    VMOVUPS([reg_fp32], ymm_fp32)
+
+    RETURN()
+
+with Function("fp16_alt_xmm_to_fp32_xmm_peachpy__avx", (arg_fp16, arg_fp32), target=uarch.default + isa.avx):
+
+    reg_fp16 = GeneralPurposeRegister64()
+    LOAD.ARGUMENT(reg_fp16, arg_fp16)
+
+    reg_fp32 = GeneralPurposeRegister64()
+    LOAD.ARGUMENT(reg_fp32, arg_fp32)
+
+    xmm_fp16 = XMMRegister()
+    VMOVUPS(xmm_fp16, [reg_fp16])
+    xmm_fp32 = fp16.avx.fp16_alt_xmm_to_fp32_xmm(xmm_fp16)
+    VMOVUPS([reg_fp32], xmm_fp32)
+
+    RETURN()
diff --git a/test/tables.cc b/test/tables.cc
new file mode 100644
index 0000000..5f4e877
--- /dev/null
+++ b/test/tables.cc
@@ -0,0 +1,530 @@
+#include <cstdint>
+
+#include "tables.h"
+
+
+namespace fp16 {
+	const uint32_t normalizedValues[1024] = {
+		0x3F800000, 0x3F802000, 0x3F804000, 0x3F806000, 0x3F808000, 0x3F80A000, 0x3F80C000, 0x3F80E000,
+		0x3F810000, 0x3F812000, 0x3F814000, 0x3F816000, 0x3F818000, 0x3F81A000, 0x3F81C000, 0x3F81E000,
+		0x3F820000, 0x3F822000, 0x3F824000, 0x3F826000, 0x3F828000, 0x3F82A000, 0x3F82C000, 0x3F82E000,
+		0x3F830000, 0x3F832000, 0x3F834000, 0x3F836000, 0x3F838000, 0x3F83A000, 0x3F83C000, 0x3F83E000,
+		0x3F840000, 0x3F842000, 0x3F844000, 0x3F846000, 0x3F848000, 0x3F84A000, 0x3F84C000, 0x3F84E000,
+		0x3F850000, 0x3F852000, 0x3F854000, 0x3F856000, 0x3F858000, 0x3F85A000, 0x3F85C000, 0x3F85E000,
+		0x3F860000, 0x3F862000, 0x3F864000, 0x3F866000, 0x3F868000, 0x3F86A000, 0x3F86C000, 0x3F86E000,
+		0x3F870000, 0x3F872000, 0x3F874000, 0x3F876000, 0x3F878000, 0x3F87A000, 0x3F87C000, 0x3F87E000,
+		0x3F880000, 0x3F882000, 0x3F884000, 0x3F886000, 0x3F888000, 0x3F88A000, 0x3F88C000, 0x3F88E000,
+		0x3F890000, 0x3F892000, 0x3F894000, 0x3F896000, 0x3F898000, 0x3F89A000, 0x3F89C000, 0x3F89E000,
+		0x3F8A0000, 0x3F8A2000, 0x3F8A4000, 0x3F8A6000, 0x3F8A8000, 0x3F8AA000, 0x3F8AC000, 0x3F8AE000,
+		0x3F8B0000, 0x3F8B2000, 0x3F8B4000, 0x3F8B6000, 0x3F8B8000, 0x3F8BA000, 0x3F8BC000, 0x3F8BE000,
+		0x3F8C0000, 0x3F8C2000, 0x3F8C4000, 0x3F8C6000, 0x3F8C8000, 0x3F8CA000, 0x3F8CC000, 0x3F8CE000,
+		0x3F8D0000, 0x3F8D2000, 0x3F8D4000, 0x3F8D6000, 0x3F8D8000, 0x3F8DA000, 0x3F8DC000, 0x3F8DE000,
+		0x3F8E0000, 0x3F8E2000, 0x3F8E4000, 0x3F8E6000, 0x3F8E8000, 0x3F8EA000, 0x3F8EC000, 0x3F8EE000,
+		0x3F8F0000, 0x3F8F2000, 0x3F8F4000, 0x3F8F6000, 0x3F8F8000, 0x3F8FA000, 0x3F8FC000, 0x3F8FE000,
+		0x3F900000, 0x3F902000, 0x3F904000, 0x3F906000, 0x3F908000, 0x3F90A000, 0x3F90C000, 0x3F90E000,
+		0x3F910000, 0x3F912000, 0x3F914000, 0x3F916000, 0x3F918000, 0x3F91A000, 0x3F91C000, 0x3F91E000,
+		0x3F920000, 0x3F922000, 0x3F924000, 0x3F926000, 0x3F928000, 0x3F92A000, 0x3F92C000, 0x3F92E000,
+		0x3F930000, 0x3F932000, 0x3F934000, 0x3F936000, 0x3F938000, 0x3F93A000, 0x3F93C000, 0x3F93E000,
+		0x3F940000, 0x3F942000, 0x3F944000, 0x3F946000, 0x3F948000, 0x3F94A000, 0x3F94C000, 0x3F94E000,
+		0x3F950000, 0x3F952000, 0x3F954000, 0x3F956000, 0x3F958000, 0x3F95A000, 0x3F95C000, 0x3F95E000,
+		0x3F960000, 0x3F962000, 0x3F964000, 0x3F966000, 0x3F968000, 0x3F96A000, 0x3F96C000, 0x3F96E000,
+		0x3F970000, 0x3F972000, 0x3F974000, 0x3F976000, 0x3F978000, 0x3F97A000, 0x3F97C000, 0x3F97E000,
+		0x3F980000, 0x3F982000, 0x3F984000, 0x3F986000, 0x3F988000, 0x3F98A000, 0x3F98C000, 0x3F98E000,
+		0x3F990000, 0x3F992000, 0x3F994000, 0x3F996000, 0x3F998000, 0x3F99A000, 0x3F99C000, 0x3F99E000,
+		0x3F9A0000, 0x3F9A2000, 0x3F9A4000, 0x3F9A6000, 0x3F9A8000, 0x3F9AA000, 0x3F9AC000, 0x3F9AE000,
+		0x3F9B0000, 0x3F9B2000, 0x3F9B4000, 0x3F9B6000, 0x3F9B8000, 0x3F9BA000, 0x3F9BC000, 0x3F9BE000,
+		0x3F9C0000, 0x3F9C2000, 0x3F9C4000, 0x3F9C6000, 0x3F9C8000, 0x3F9CA000, 0x3F9CC000, 0x3F9CE000,
+		0x3F9D0000, 0x3F9D2000, 0x3F9D4000, 0x3F9D6000, 0x3F9D8000, 0x3F9DA000, 0x3F9DC000, 0x3F9DE000,
+		0x3F9E0000, 0x3F9E2000, 0x3F9E4000, 0x3F9E6000, 0x3F9E8000, 0x3F9EA000, 0x3F9EC000, 0x3F9EE000,
+		0x3F9F0000, 0x3F9F2000, 0x3F9F4000, 0x3F9F6000, 0x3F9F8000, 0x3F9FA000, 0x3F9FC000, 0x3F9FE000,
+		0x3FA00000, 0x3FA02000, 0x3FA04000, 0x3FA06000, 0x3FA08000, 0x3FA0A000, 0x3FA0C000, 0x3FA0E000,
+		0x3FA10000, 0x3FA12000, 0x3FA14000, 0x3FA16000, 0x3FA18000, 0x3FA1A000, 0x3FA1C000, 0x3FA1E000,
+		0x3FA20000, 0x3FA22000, 0x3FA24000, 0x3FA26000, 0x3FA28000, 0x3FA2A000, 0x3FA2C000, 0x3FA2E000,
+		0x3FA30000, 0x3FA32000, 0x3FA34000, 0x3FA36000, 0x3FA38000, 0x3FA3A000, 0x3FA3C000, 0x3FA3E000,
+		0x3FA40000, 0x3FA42000, 0x3FA44000, 0x3FA46000, 0x3FA48000, 0x3FA4A000, 0x3FA4C000, 0x3FA4E000,
+		0x3FA50000, 0x3FA52000, 0x3FA54000, 0x3FA56000, 0x3FA58000, 0x3FA5A000, 0x3FA5C000, 0x3FA5E000,
+		0x3FA60000, 0x3FA62000, 0x3FA64000, 0x3FA66000, 0x3FA68000, 0x3FA6A000, 0x3FA6C000, 0x3FA6E000,
+		0x3FA70000, 0x3FA72000, 0x3FA74000, 0x3FA76000, 0x3FA78000, 0x3FA7A000, 0x3FA7C000, 0x3FA7E000,
+		0x3FA80000, 0x3FA82000, 0x3FA84000, 0x3FA86000, 0x3FA88000, 0x3FA8A000, 0x3FA8C000, 0x3FA8E000,
+		0x3FA90000, 0x3FA92000, 0x3FA94000, 0x3FA96000, 0x3FA98000, 0x3FA9A000, 0x3FA9C000, 0x3FA9E000,
+		0x3FAA0000, 0x3FAA2000, 0x3FAA4000, 0x3FAA6000, 0x3FAA8000, 0x3FAAA000, 0x3FAAC000, 0x3FAAE000,
+		0x3FAB0000, 0x3FAB2000, 0x3FAB4000, 0x3FAB6000, 0x3FAB8000, 0x3FABA000, 0x3FABC000, 0x3FABE000,
+		0x3FAC0000, 0x3FAC2000, 0x3FAC4000, 0x3FAC6000, 0x3FAC8000, 0x3FACA000, 0x3FACC000, 0x3FACE000,
+		0x3FAD0000, 0x3FAD2000, 0x3FAD4000, 0x3FAD6000, 0x3FAD8000, 0x3FADA000, 0x3FADC000, 0x3FADE000,
+		0x3FAE0000, 0x3FAE2000, 0x3FAE4000, 0x3FAE6000, 0x3FAE8000, 0x3FAEA000, 0x3FAEC000, 0x3FAEE000,
+		0x3FAF0000, 0x3FAF2000, 0x3FAF4000, 0x3FAF6000, 0x3FAF8000, 0x3FAFA000, 0x3FAFC000, 0x3FAFE000,
+		0x3FB00000, 0x3FB02000, 0x3FB04000, 0x3FB06000, 0x3FB08000, 0x3FB0A000, 0x3FB0C000, 0x3FB0E000,
+		0x3FB10000, 0x3FB12000, 0x3FB14000, 0x3FB16000, 0x3FB18000, 0x3FB1A000, 0x3FB1C000, 0x3FB1E000,
+		0x3FB20000, 0x3FB22000, 0x3FB24000, 0x3FB26000, 0x3FB28000, 0x3FB2A000, 0x3FB2C000, 0x3FB2E000,
+		0x3FB30000, 0x3FB32000, 0x3FB34000, 0x3FB36000, 0x3FB38000, 0x3FB3A000, 0x3FB3C000, 0x3FB3E000,
+		0x3FB40000, 0x3FB42000, 0x3FB44000, 0x3FB46000, 0x3FB48000, 0x3FB4A000, 0x3FB4C000, 0x3FB4E000,
+		0x3FB50000, 0x3FB52000, 0x3FB54000, 0x3FB56000, 0x3FB58000, 0x3FB5A000, 0x3FB5C000, 0x3FB5E000,
+		0x3FB60000, 0x3FB62000, 0x3FB64000, 0x3FB66000, 0x3FB68000, 0x3FB6A000, 0x3FB6C000, 0x3FB6E000,
+		0x3FB70000, 0x3FB72000, 0x3FB74000, 0x3FB76000, 0x3FB78000, 0x3FB7A000, 0x3FB7C000, 0x3FB7E000,
+		0x3FB80000, 0x3FB82000, 0x3FB84000, 0x3FB86000, 0x3FB88000, 0x3FB8A000, 0x3FB8C000, 0x3FB8E000,
+		0x3FB90000, 0x3FB92000, 0x3FB94000, 0x3FB96000, 0x3FB98000, 0x3FB9A000, 0x3FB9C000, 0x3FB9E000,
+		0x3FBA0000, 0x3FBA2000, 0x3FBA4000, 0x3FBA6000, 0x3FBA8000, 0x3FBAA000, 0x3FBAC000, 0x3FBAE000,
+		0x3FBB0000, 0x3FBB2000, 0x3FBB4000, 0x3FBB6000, 0x3FBB8000, 0x3FBBA000, 0x3FBBC000, 0x3FBBE000,
+		0x3FBC0000, 0x3FBC2000, 0x3FBC4000, 0x3FBC6000, 0x3FBC8000, 0x3FBCA000, 0x3FBCC000, 0x3FBCE000,
+		0x3FBD0000, 0x3FBD2000, 0x3FBD4000, 0x3FBD6000, 0x3FBD8000, 0x3FBDA000, 0x3FBDC000, 0x3FBDE000,
+		0x3FBE0000, 0x3FBE2000, 0x3FBE4000, 0x3FBE6000, 0x3FBE8000, 0x3FBEA000, 0x3FBEC000, 0x3FBEE000,
+		0x3FBF0000, 0x3FBF2000, 0x3FBF4000, 0x3FBF6000, 0x3FBF8000, 0x3FBFA000, 0x3FBFC000, 0x3FBFE000,
+		0x3FC00000, 0x3FC02000, 0x3FC04000, 0x3FC06000, 0x3FC08000, 0x3FC0A000, 0x3FC0C000, 0x3FC0E000,
+		0x3FC10000, 0x3FC12000, 0x3FC14000, 0x3FC16000, 0x3FC18000, 0x3FC1A000, 0x3FC1C000, 0x3FC1E000,
+		0x3FC20000, 0x3FC22000, 0x3FC24000, 0x3FC26000, 0x3FC28000, 0x3FC2A000, 0x3FC2C000, 0x3FC2E000,
+		0x3FC30000, 0x3FC32000, 0x3FC34000, 0x3FC36000, 0x3FC38000, 0x3FC3A000, 0x3FC3C000, 0x3FC3E000,
+		0x3FC40000, 0x3FC42000, 0x3FC44000, 0x3FC46000, 0x3FC48000, 0x3FC4A000, 0x3FC4C000, 0x3FC4E000,
+		0x3FC50000, 0x3FC52000, 0x3FC54000, 0x3FC56000, 0x3FC58000, 0x3FC5A000, 0x3FC5C000, 0x3FC5E000,
+		0x3FC60000, 0x3FC62000, 0x3FC64000, 0x3FC66000, 0x3FC68000, 0x3FC6A000, 0x3FC6C000, 0x3FC6E000,
+		0x3FC70000, 0x3FC72000, 0x3FC74000, 0x3FC76000, 0x3FC78000, 0x3FC7A000, 0x3FC7C000, 0x3FC7E000,
+		0x3FC80000, 0x3FC82000, 0x3FC84000, 0x3FC86000, 0x3FC88000, 0x3FC8A000, 0x3FC8C000, 0x3FC8E000,
+		0x3FC90000, 0x3FC92000, 0x3FC94000, 0x3FC96000, 0x3FC98000, 0x3FC9A000, 0x3FC9C000, 0x3FC9E000,
+		0x3FCA0000, 0x3FCA2000, 0x3FCA4000, 0x3FCA6000, 0x3FCA8000, 0x3FCAA000, 0x3FCAC000, 0x3FCAE000,
+		0x3FCB0000, 0x3FCB2000, 0x3FCB4000, 0x3FCB6000, 0x3FCB8000, 0x3FCBA000, 0x3FCBC000, 0x3FCBE000,
+		0x3FCC0000, 0x3FCC2000, 0x3FCC4000, 0x3FCC6000, 0x3FCC8000, 0x3FCCA000, 0x3FCCC000, 0x3FCCE000,
+		0x3FCD0000, 0x3FCD2000, 0x3FCD4000, 0x3FCD6000, 0x3FCD8000, 0x3FCDA000, 0x3FCDC000, 0x3FCDE000,
+		0x3FCE0000, 0x3FCE2000, 0x3FCE4000, 0x3FCE6000, 0x3FCE8000, 0x3FCEA000, 0x3FCEC000, 0x3FCEE000,
+		0x3FCF0000, 0x3FCF2000, 0x3FCF4000, 0x3FCF6000, 0x3FCF8000, 0x3FCFA000, 0x3FCFC000, 0x3FCFE000,
+		0x3FD00000, 0x3FD02000, 0x3FD04000, 0x3FD06000, 0x3FD08000, 0x3FD0A000, 0x3FD0C000, 0x3FD0E000,
+		0x3FD10000, 0x3FD12000, 0x3FD14000, 0x3FD16000, 0x3FD18000, 0x3FD1A000, 0x3FD1C000, 0x3FD1E000,
+		0x3FD20000, 0x3FD22000, 0x3FD24000, 0x3FD26000, 0x3FD28000, 0x3FD2A000, 0x3FD2C000, 0x3FD2E000,
+		0x3FD30000, 0x3FD32000, 0x3FD34000, 0x3FD36000, 0x3FD38000, 0x3FD3A000, 0x3FD3C000, 0x3FD3E000,
+		0x3FD40000, 0x3FD42000, 0x3FD44000, 0x3FD46000, 0x3FD48000, 0x3FD4A000, 0x3FD4C000, 0x3FD4E000,
+		0x3FD50000, 0x3FD52000, 0x3FD54000, 0x3FD56000, 0x3FD58000, 0x3FD5A000, 0x3FD5C000, 0x3FD5E000,
+		0x3FD60000, 0x3FD62000, 0x3FD64000, 0x3FD66000, 0x3FD68000, 0x3FD6A000, 0x3FD6C000, 0x3FD6E000,
+		0x3FD70000, 0x3FD72000, 0x3FD74000, 0x3FD76000, 0x3FD78000, 0x3FD7A000, 0x3FD7C000, 0x3FD7E000,
+		0x3FD80000, 0x3FD82000, 0x3FD84000, 0x3FD86000, 0x3FD88000, 0x3FD8A000, 0x3FD8C000, 0x3FD8E000,
+		0x3FD90000, 0x3FD92000, 0x3FD94000, 0x3FD96000, 0x3FD98000, 0x3FD9A000, 0x3FD9C000, 0x3FD9E000,
+		0x3FDA0000, 0x3FDA2000, 0x3FDA4000, 0x3FDA6000, 0x3FDA8000, 0x3FDAA000, 0x3FDAC000, 0x3FDAE000,
+		0x3FDB0000, 0x3FDB2000, 0x3FDB4000, 0x3FDB6000, 0x3FDB8000, 0x3FDBA000, 0x3FDBC000, 0x3FDBE000,
+		0x3FDC0000, 0x3FDC2000, 0x3FDC4000, 0x3FDC6000, 0x3FDC8000, 0x3FDCA000, 0x3FDCC000, 0x3FDCE000,
+		0x3FDD0000, 0x3FDD2000, 0x3FDD4000, 0x3FDD6000, 0x3FDD8000, 0x3FDDA000, 0x3FDDC000, 0x3FDDE000,
+		0x3FDE0000, 0x3FDE2000, 0x3FDE4000, 0x3FDE6000, 0x3FDE8000, 0x3FDEA000, 0x3FDEC000, 0x3FDEE000,
+		0x3FDF0000, 0x3FDF2000, 0x3FDF4000, 0x3FDF6000, 0x3FDF8000, 0x3FDFA000, 0x3FDFC000, 0x3FDFE000,
+		0x3FE00000, 0x3FE02000, 0x3FE04000, 0x3FE06000, 0x3FE08000, 0x3FE0A000, 0x3FE0C000, 0x3FE0E000,
+		0x3FE10000, 0x3FE12000, 0x3FE14000, 0x3FE16000, 0x3FE18000, 0x3FE1A000, 0x3FE1C000, 0x3FE1E000,
+		0x3FE20000, 0x3FE22000, 0x3FE24000, 0x3FE26000, 0x3FE28000, 0x3FE2A000, 0x3FE2C000, 0x3FE2E000,
+		0x3FE30000, 0x3FE32000, 0x3FE34000, 0x3FE36000, 0x3FE38000, 0x3FE3A000, 0x3FE3C000, 0x3FE3E000,
+		0x3FE40000, 0x3FE42000, 0x3FE44000, 0x3FE46000, 0x3FE48000, 0x3FE4A000, 0x3FE4C000, 0x3FE4E000,
+		0x3FE50000, 0x3FE52000, 0x3FE54000, 0x3FE56000, 0x3FE58000, 0x3FE5A000, 0x3FE5C000, 0x3FE5E000,
+		0x3FE60000, 0x3FE62000, 0x3FE64000, 0x3FE66000, 0x3FE68000, 0x3FE6A000, 0x3FE6C000, 0x3FE6E000,
+		0x3FE70000, 0x3FE72000, 0x3FE74000, 0x3FE76000, 0x3FE78000, 0x3FE7A000, 0x3FE7C000, 0x3FE7E000,
+		0x3FE80000, 0x3FE82000, 0x3FE84000, 0x3FE86000, 0x3FE88000, 0x3FE8A000, 0x3FE8C000, 0x3FE8E000,
+		0x3FE90000, 0x3FE92000, 0x3FE94000, 0x3FE96000, 0x3FE98000, 0x3FE9A000, 0x3FE9C000, 0x3FE9E000,
+		0x3FEA0000, 0x3FEA2000, 0x3FEA4000, 0x3FEA6000, 0x3FEA8000, 0x3FEAA000, 0x3FEAC000, 0x3FEAE000,
+		0x3FEB0000, 0x3FEB2000, 0x3FEB4000, 0x3FEB6000, 0x3FEB8000, 0x3FEBA000, 0x3FEBC000, 0x3FEBE000,
+		0x3FEC0000, 0x3FEC2000, 0x3FEC4000, 0x3FEC6000, 0x3FEC8000, 0x3FECA000, 0x3FECC000, 0x3FECE000,
+		0x3FED0000, 0x3FED2000, 0x3FED4000, 0x3FED6000, 0x3FED8000, 0x3FEDA000, 0x3FEDC000, 0x3FEDE000,
+		0x3FEE0000, 0x3FEE2000, 0x3FEE4000, 0x3FEE6000, 0x3FEE8000, 0x3FEEA000, 0x3FEEC000, 0x3FEEE000,
+		0x3FEF0000, 0x3FEF2000, 0x3FEF4000, 0x3FEF6000, 0x3FEF8000, 0x3FEFA000, 0x3FEFC000, 0x3FEFE000,
+		0x3FF00000, 0x3FF02000, 0x3FF04000, 0x3FF06000, 0x3FF08000, 0x3FF0A000, 0x3FF0C000, 0x3FF0E000,
+		0x3FF10000, 0x3FF12000, 0x3FF14000, 0x3FF16000, 0x3FF18000, 0x3FF1A000, 0x3FF1C000, 0x3FF1E000,
+		0x3FF20000, 0x3FF22000, 0x3FF24000, 0x3FF26000, 0x3FF28000, 0x3FF2A000, 0x3FF2C000, 0x3FF2E000,
+		0x3FF30000, 0x3FF32000, 0x3FF34000, 0x3FF36000, 0x3FF38000, 0x3FF3A000, 0x3FF3C000, 0x3FF3E000,
+		0x3FF40000, 0x3FF42000, 0x3FF44000, 0x3FF46000, 0x3FF48000, 0x3FF4A000, 0x3FF4C000, 0x3FF4E000,
+		0x3FF50000, 0x3FF52000, 0x3FF54000, 0x3FF56000, 0x3FF58000, 0x3FF5A000, 0x3FF5C000, 0x3FF5E000,
+		0x3FF60000, 0x3FF62000, 0x3FF64000, 0x3FF66000, 0x3FF68000, 0x3FF6A000, 0x3FF6C000, 0x3FF6E000,
+		0x3FF70000, 0x3FF72000, 0x3FF74000, 0x3FF76000, 0x3FF78000, 0x3FF7A000, 0x3FF7C000, 0x3FF7E000,
+		0x3FF80000, 0x3FF82000, 0x3FF84000, 0x3FF86000, 0x3FF88000, 0x3FF8A000, 0x3FF8C000, 0x3FF8E000,
+		0x3FF90000, 0x3FF92000, 0x3FF94000, 0x3FF96000, 0x3FF98000, 0x3FF9A000, 0x3FF9C000, 0x3FF9E000,
+		0x3FFA0000, 0x3FFA2000, 0x3FFA4000, 0x3FFA6000, 0x3FFA8000, 0x3FFAA000, 0x3FFAC000, 0x3FFAE000,
+		0x3FFB0000, 0x3FFB2000, 0x3FFB4000, 0x3FFB6000, 0x3FFB8000, 0x3FFBA000, 0x3FFBC000, 0x3FFBE000,
+		0x3FFC0000, 0x3FFC2000, 0x3FFC4000, 0x3FFC6000, 0x3FFC8000, 0x3FFCA000, 0x3FFCC000, 0x3FFCE000,
+		0x3FFD0000, 0x3FFD2000, 0x3FFD4000, 0x3FFD6000, 0x3FFD8000, 0x3FFDA000, 0x3FFDC000, 0x3FFDE000,
+		0x3FFE0000, 0x3FFE2000, 0x3FFE4000, 0x3FFE6000, 0x3FFE8000, 0x3FFEA000, 0x3FFEC000, 0x3FFEE000,
+		0x3FFF0000, 0x3FFF2000, 0x3FFF4000, 0x3FFF6000, 0x3FFF8000, 0x3FFFA000, 0x3FFFC000, 0x3FFFE000,
+	};
+
+	const uint32_t denormalizedValues[1024] = {
+		0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000,
+		0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
+		0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000,
+		0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000,
+		0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000,
+		0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000,
+		0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000,
+		0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000,
+		0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000,
+		0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000,
+		0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000,
+		0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,
+		0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000,
+		0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000,
+		0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000,
+		0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000,
+		0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000,
+		0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000,
+		0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000,
+		0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000,
+		0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
+		0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000,
+		0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000,
+		0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000,
+		0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000,
+		0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000,
+		0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
+		0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000,
+		0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000,
+		0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,
+		0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000,
+		0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000,
+		0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
+		0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000,
+		0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000,
+		0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000,
+		0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000,
+		0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000,
+		0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000,
+		0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000,
+		0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000,
+		0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000,
+		0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000,
+		0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000,
+		0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000,
+		0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000,
+		0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000,
+		0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,
+		0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000,
+		0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000,
+		0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000,
+		0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000,
+		0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000,
+		0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000,
+		0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000,
+		0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000,
+		0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000,
+		0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000,
+		0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000,
+		0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000,
+		0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000,
+		0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000,
+		0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000,
+		0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000,
+		0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000,
+		0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,
+		0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000,
+		0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000,
+		0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000,
+		0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000,
+		0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000,
+		0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000,
+		0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000,
+		0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000,
+		0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000,
+		0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000,
+		0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000,
+		0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000,
+		0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000,
+		0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000,
+		0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000,
+		0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000,
+		0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000,
+		0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,
+		0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000,
+		0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000,
+		0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000,
+		0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000,
+		0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000,
+		0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000,
+		0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000,
+		0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000,
+		0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000,
+		0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000,
+		0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000,
+		0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000,
+		0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000,
+		0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000,
+		0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000,
+		0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000,
+		0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000,
+		0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,
+		0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000,
+		0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000,
+		0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000,
+		0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000,
+		0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000,
+		0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000,
+		0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000,
+		0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000,
+		0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000,
+		0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000,
+		0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000,
+		0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000,
+		0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000,
+		0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000,
+		0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000,
+		0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000,
+		0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000,
+		0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,
+		0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000,
+		0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000,
+		0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000,
+		0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000,
+		0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000,
+		0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000,
+		0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000,
+		0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000,
+	};
+
+	const uint32_t normalizedRanges[1024] = {
+		0x3F801001, 0x3F803000, 0x3F805001, 0x3F807000, 0x3F809001, 0x3F80B000, 0x3F80D001, 0x3F80F000,
+		0x3F811001, 0x3F813000, 0x3F815001, 0x3F817000, 0x3F819001, 0x3F81B000, 0x3F81D001, 0x3F81F000,
+		0x3F821001, 0x3F823000, 0x3F825001, 0x3F827000, 0x3F829001, 0x3F82B000, 0x3F82D001, 0x3F82F000,
+		0x3F831001, 0x3F833000, 0x3F835001, 0x3F837000, 0x3F839001, 0x3F83B000, 0x3F83D001, 0x3F83F000,
+		0x3F841001, 0x3F843000, 0x3F845001, 0x3F847000, 0x3F849001, 0x3F84B000, 0x3F84D001, 0x3F84F000,
+		0x3F851001, 0x3F853000, 0x3F855001, 0x3F857000, 0x3F859001, 0x3F85B000, 0x3F85D001, 0x3F85F000,
+		0x3F861001, 0x3F863000, 0x3F865001, 0x3F867000, 0x3F869001, 0x3F86B000, 0x3F86D001, 0x3F86F000,
+		0x3F871001, 0x3F873000, 0x3F875001, 0x3F877000, 0x3F879001, 0x3F87B000, 0x3F87D001, 0x3F87F000,
+		0x3F881001, 0x3F883000, 0x3F885001, 0x3F887000, 0x3F889001, 0x3F88B000, 0x3F88D001, 0x3F88F000,
+		0x3F891001, 0x3F893000, 0x3F895001, 0x3F897000, 0x3F899001, 0x3F89B000, 0x3F89D001, 0x3F89F000,
+		0x3F8A1001, 0x3F8A3000, 0x3F8A5001, 0x3F8A7000, 0x3F8A9001, 0x3F8AB000, 0x3F8AD001, 0x3F8AF000,
+		0x3F8B1001, 0x3F8B3000, 0x3F8B5001, 0x3F8B7000, 0x3F8B9001, 0x3F8BB000, 0x3F8BD001, 0x3F8BF000,
+		0x3F8C1001, 0x3F8C3000, 0x3F8C5001, 0x3F8C7000, 0x3F8C9001, 0x3F8CB000, 0x3F8CD001, 0x3F8CF000,
+		0x3F8D1001, 0x3F8D3000, 0x3F8D5001, 0x3F8D7000, 0x3F8D9001, 0x3F8DB000, 0x3F8DD001, 0x3F8DF000,
+		0x3F8E1001, 0x3F8E3000, 0x3F8E5001, 0x3F8E7000, 0x3F8E9001, 0x3F8EB000, 0x3F8ED001, 0x3F8EF000,
+		0x3F8F1001, 0x3F8F3000, 0x3F8F5001, 0x3F8F7000, 0x3F8F9001, 0x3F8FB000, 0x3F8FD001, 0x3F8FF000,
+		0x3F901001, 0x3F903000, 0x3F905001, 0x3F907000, 0x3F909001, 0x3F90B000, 0x3F90D001, 0x3F90F000,
+		0x3F911001, 0x3F913000, 0x3F915001, 0x3F917000, 0x3F919001, 0x3F91B000, 0x3F91D001, 0x3F91F000,
+		0x3F921001, 0x3F923000, 0x3F925001, 0x3F927000, 0x3F929001, 0x3F92B000, 0x3F92D001, 0x3F92F000,
+		0x3F931001, 0x3F933000, 0x3F935001, 0x3F937000, 0x3F939001, 0x3F93B000, 0x3F93D001, 0x3F93F000,
+		0x3F941001, 0x3F943000, 0x3F945001, 0x3F947000, 0x3F949001, 0x3F94B000, 0x3F94D001, 0x3F94F000,
+		0x3F951001, 0x3F953000, 0x3F955001, 0x3F957000, 0x3F959001, 0x3F95B000, 0x3F95D001, 0x3F95F000,
+		0x3F961001, 0x3F963000, 0x3F965001, 0x3F967000, 0x3F969001, 0x3F96B000, 0x3F96D001, 0x3F96F000,
+		0x3F971001, 0x3F973000, 0x3F975001, 0x3F977000, 0x3F979001, 0x3F97B000, 0x3F97D001, 0x3F97F000,
+		0x3F981001, 0x3F983000, 0x3F985001, 0x3F987000, 0x3F989001, 0x3F98B000, 0x3F98D001, 0x3F98F000,
+		0x3F991001, 0x3F993000, 0x3F995001, 0x3F997000, 0x3F999001, 0x3F99B000, 0x3F99D001, 0x3F99F000,
+		0x3F9A1001, 0x3F9A3000, 0x3F9A5001, 0x3F9A7000, 0x3F9A9001, 0x3F9AB000, 0x3F9AD001, 0x3F9AF000,
+		0x3F9B1001, 0x3F9B3000, 0x3F9B5001, 0x3F9B7000, 0x3F9B9001, 0x3F9BB000, 0x3F9BD001, 0x3F9BF000,
+		0x3F9C1001, 0x3F9C3000, 0x3F9C5001, 0x3F9C7000, 0x3F9C9001, 0x3F9CB000, 0x3F9CD001, 0x3F9CF000,
+		0x3F9D1001, 0x3F9D3000, 0x3F9D5001, 0x3F9D7000, 0x3F9D9001, 0x3F9DB000, 0x3F9DD001, 0x3F9DF000,
+		0x3F9E1001, 0x3F9E3000, 0x3F9E5001, 0x3F9E7000, 0x3F9E9001, 0x3F9EB000, 0x3F9ED001, 0x3F9EF000,
+		0x3F9F1001, 0x3F9F3000, 0x3F9F5001, 0x3F9F7000, 0x3F9F9001, 0x3F9FB000, 0x3F9FD001, 0x3F9FF000,
+		0x3FA01001, 0x3FA03000, 0x3FA05001, 0x3FA07000, 0x3FA09001, 0x3FA0B000, 0x3FA0D001, 0x3FA0F000,
+		0x3FA11001, 0x3FA13000, 0x3FA15001, 0x3FA17000, 0x3FA19001, 0x3FA1B000, 0x3FA1D001, 0x3FA1F000,
+		0x3FA21001, 0x3FA23000, 0x3FA25001, 0x3FA27000, 0x3FA29001, 0x3FA2B000, 0x3FA2D001, 0x3FA2F000,
+		0x3FA31001, 0x3FA33000, 0x3FA35001, 0x3FA37000, 0x3FA39001, 0x3FA3B000, 0x3FA3D001, 0x3FA3F000,
+		0x3FA41001, 0x3FA43000, 0x3FA45001, 0x3FA47000, 0x3FA49001, 0x3FA4B000, 0x3FA4D001, 0x3FA4F000,
+		0x3FA51001, 0x3FA53000, 0x3FA55001, 0x3FA57000, 0x3FA59001, 0x3FA5B000, 0x3FA5D001, 0x3FA5F000,
+		0x3FA61001, 0x3FA63000, 0x3FA65001, 0x3FA67000, 0x3FA69001, 0x3FA6B000, 0x3FA6D001, 0x3FA6F000,
+		0x3FA71001, 0x3FA73000, 0x3FA75001, 0x3FA77000, 0x3FA79001, 0x3FA7B000, 0x3FA7D001, 0x3FA7F000,
+		0x3FA81001, 0x3FA83000, 0x3FA85001, 0x3FA87000, 0x3FA89001, 0x3FA8B000, 0x3FA8D001, 0x3FA8F000,
+		0x3FA91001, 0x3FA93000, 0x3FA95001, 0x3FA97000, 0x3FA99001, 0x3FA9B000, 0x3FA9D001, 0x3FA9F000,
+		0x3FAA1001, 0x3FAA3000, 0x3FAA5001, 0x3FAA7000, 0x3FAA9001, 0x3FAAB000, 0x3FAAD001, 0x3FAAF000,
+		0x3FAB1001, 0x3FAB3000, 0x3FAB5001, 0x3FAB7000, 0x3FAB9001, 0x3FABB000, 0x3FABD001, 0x3FABF000,
+		0x3FAC1001, 0x3FAC3000, 0x3FAC5001, 0x3FAC7000, 0x3FAC9001, 0x3FACB000, 0x3FACD001, 0x3FACF000,
+		0x3FAD1001, 0x3FAD3000, 0x3FAD5001, 0x3FAD7000, 0x3FAD9001, 0x3FADB000, 0x3FADD001, 0x3FADF000,
+		0x3FAE1001, 0x3FAE3000, 0x3FAE5001, 0x3FAE7000, 0x3FAE9001, 0x3FAEB000, 0x3FAED001, 0x3FAEF000,
+		0x3FAF1001, 0x3FAF3000, 0x3FAF5001, 0x3FAF7000, 0x3FAF9001, 0x3FAFB000, 0x3FAFD001, 0x3FAFF000,
+		0x3FB01001, 0x3FB03000, 0x3FB05001, 0x3FB07000, 0x3FB09001, 0x3FB0B000, 0x3FB0D001, 0x3FB0F000,
+		0x3FB11001, 0x3FB13000, 0x3FB15001, 0x3FB17000, 0x3FB19001, 0x3FB1B000, 0x3FB1D001, 0x3FB1F000,
+		0x3FB21001, 0x3FB23000, 0x3FB25001, 0x3FB27000, 0x3FB29001, 0x3FB2B000, 0x3FB2D001, 0x3FB2F000,
+		0x3FB31001, 0x3FB33000, 0x3FB35001, 0x3FB37000, 0x3FB39001, 0x3FB3B000, 0x3FB3D001, 0x3FB3F000,
+		0x3FB41001, 0x3FB43000, 0x3FB45001, 0x3FB47000, 0x3FB49001, 0x3FB4B000, 0x3FB4D001, 0x3FB4F000,
+		0x3FB51001, 0x3FB53000, 0x3FB55001, 0x3FB57000, 0x3FB59001, 0x3FB5B000, 0x3FB5D001, 0x3FB5F000,
+		0x3FB61001, 0x3FB63000, 0x3FB65001, 0x3FB67000, 0x3FB69001, 0x3FB6B000, 0x3FB6D001, 0x3FB6F000,
+		0x3FB71001, 0x3FB73000, 0x3FB75001, 0x3FB77000, 0x3FB79001, 0x3FB7B000, 0x3FB7D001, 0x3FB7F000,
+		0x3FB81001, 0x3FB83000, 0x3FB85001, 0x3FB87000, 0x3FB89001, 0x3FB8B000, 0x3FB8D001, 0x3FB8F000,
+		0x3FB91001, 0x3FB93000, 0x3FB95001, 0x3FB97000, 0x3FB99001, 0x3FB9B000, 0x3FB9D001, 0x3FB9F000,
+		0x3FBA1001, 0x3FBA3000, 0x3FBA5001, 0x3FBA7000, 0x3FBA9001, 0x3FBAB000, 0x3FBAD001, 0x3FBAF000,
+		0x3FBB1001, 0x3FBB3000, 0x3FBB5001, 0x3FBB7000, 0x3FBB9001, 0x3FBBB000, 0x3FBBD001, 0x3FBBF000,
+		0x3FBC1001, 0x3FBC3000, 0x3FBC5001, 0x3FBC7000, 0x3FBC9001, 0x3FBCB000, 0x3FBCD001, 0x3FBCF000,
+		0x3FBD1001, 0x3FBD3000, 0x3FBD5001, 0x3FBD7000, 0x3FBD9001, 0x3FBDB000, 0x3FBDD001, 0x3FBDF000,
+		0x3FBE1001, 0x3FBE3000, 0x3FBE5001, 0x3FBE7000, 0x3FBE9001, 0x3FBEB000, 0x3FBED001, 0x3FBEF000,
+		0x3FBF1001, 0x3FBF3000, 0x3FBF5001, 0x3FBF7000, 0x3FBF9001, 0x3FBFB000, 0x3FBFD001, 0x3FBFF000,
+		0x3FC01001, 0x3FC03000, 0x3FC05001, 0x3FC07000, 0x3FC09001, 0x3FC0B000, 0x3FC0D001, 0x3FC0F000,
+		0x3FC11001, 0x3FC13000, 0x3FC15001, 0x3FC17000, 0x3FC19001, 0x3FC1B000, 0x3FC1D001, 0x3FC1F000,
+		0x3FC21001, 0x3FC23000, 0x3FC25001, 0x3FC27000, 0x3FC29001, 0x3FC2B000, 0x3FC2D001, 0x3FC2F000,
+		0x3FC31001, 0x3FC33000, 0x3FC35001, 0x3FC37000, 0x3FC39001, 0x3FC3B000, 0x3FC3D001, 0x3FC3F000,
+		0x3FC41001, 0x3FC43000, 0x3FC45001, 0x3FC47000, 0x3FC49001, 0x3FC4B000, 0x3FC4D001, 0x3FC4F000,
+		0x3FC51001, 0x3FC53000, 0x3FC55001, 0x3FC57000, 0x3FC59001, 0x3FC5B000, 0x3FC5D001, 0x3FC5F000,
+		0x3FC61001, 0x3FC63000, 0x3FC65001, 0x3FC67000, 0x3FC69001, 0x3FC6B000, 0x3FC6D001, 0x3FC6F000,
+		0x3FC71001, 0x3FC73000, 0x3FC75001, 0x3FC77000, 0x3FC79001, 0x3FC7B000, 0x3FC7D001, 0x3FC7F000,
+		0x3FC81001, 0x3FC83000, 0x3FC85001, 0x3FC87000, 0x3FC89001, 0x3FC8B000, 0x3FC8D001, 0x3FC8F000,
+		0x3FC91001, 0x3FC93000, 0x3FC95001, 0x3FC97000, 0x3FC99001, 0x3FC9B000, 0x3FC9D001, 0x3FC9F000,
+		0x3FCA1001, 0x3FCA3000, 0x3FCA5001, 0x3FCA7000, 0x3FCA9001, 0x3FCAB000, 0x3FCAD001, 0x3FCAF000,
+		0x3FCB1001, 0x3FCB3000, 0x3FCB5001, 0x3FCB7000, 0x3FCB9001, 0x3FCBB000, 0x3FCBD001, 0x3FCBF000,
+		0x3FCC1001, 0x3FCC3000, 0x3FCC5001, 0x3FCC7000, 0x3FCC9001, 0x3FCCB000, 0x3FCCD001, 0x3FCCF000,
+		0x3FCD1001, 0x3FCD3000, 0x3FCD5001, 0x3FCD7000, 0x3FCD9001, 0x3FCDB000, 0x3FCDD001, 0x3FCDF000,
+		0x3FCE1001, 0x3FCE3000, 0x3FCE5001, 0x3FCE7000, 0x3FCE9001, 0x3FCEB000, 0x3FCED001, 0x3FCEF000,
+		0x3FCF1001, 0x3FCF3000, 0x3FCF5001, 0x3FCF7000, 0x3FCF9001, 0x3FCFB000, 0x3FCFD001, 0x3FCFF000,
+		0x3FD01001, 0x3FD03000, 0x3FD05001, 0x3FD07000, 0x3FD09001, 0x3FD0B000, 0x3FD0D001, 0x3FD0F000,
+		0x3FD11001, 0x3FD13000, 0x3FD15001, 0x3FD17000, 0x3FD19001, 0x3FD1B000, 0x3FD1D001, 0x3FD1F000,
+		0x3FD21001, 0x3FD23000, 0x3FD25001, 0x3FD27000, 0x3FD29001, 0x3FD2B000, 0x3FD2D001, 0x3FD2F000,
+		0x3FD31001, 0x3FD33000, 0x3FD35001, 0x3FD37000, 0x3FD39001, 0x3FD3B000, 0x3FD3D001, 0x3FD3F000,
+		0x3FD41001, 0x3FD43000, 0x3FD45001, 0x3FD47000, 0x3FD49001, 0x3FD4B000, 0x3FD4D001, 0x3FD4F000,
+		0x3FD51001, 0x3FD53000, 0x3FD55001, 0x3FD57000, 0x3FD59001, 0x3FD5B000, 0x3FD5D001, 0x3FD5F000,
+		0x3FD61001, 0x3FD63000, 0x3FD65001, 0x3FD67000, 0x3FD69001, 0x3FD6B000, 0x3FD6D001, 0x3FD6F000,
+		0x3FD71001, 0x3FD73000, 0x3FD75001, 0x3FD77000, 0x3FD79001, 0x3FD7B000, 0x3FD7D001, 0x3FD7F000,
+		0x3FD81001, 0x3FD83000, 0x3FD85001, 0x3FD87000, 0x3FD89001, 0x3FD8B000, 0x3FD8D001, 0x3FD8F000,
+		0x3FD91001, 0x3FD93000, 0x3FD95001, 0x3FD97000, 0x3FD99001, 0x3FD9B000, 0x3FD9D001, 0x3FD9F000,
+		0x3FDA1001, 0x3FDA3000, 0x3FDA5001, 0x3FDA7000, 0x3FDA9001, 0x3FDAB000, 0x3FDAD001, 0x3FDAF000,
+		0x3FDB1001, 0x3FDB3000, 0x3FDB5001, 0x3FDB7000, 0x3FDB9001, 0x3FDBB000, 0x3FDBD001, 0x3FDBF000,
+		0x3FDC1001, 0x3FDC3000, 0x3FDC5001, 0x3FDC7000, 0x3FDC9001, 0x3FDCB000, 0x3FDCD001, 0x3FDCF000,
+		0x3FDD1001, 0x3FDD3000, 0x3FDD5001, 0x3FDD7000, 0x3FDD9001, 0x3FDDB000, 0x3FDDD001, 0x3FDDF000,
+		0x3FDE1001, 0x3FDE3000, 0x3FDE5001, 0x3FDE7000, 0x3FDE9001, 0x3FDEB000, 0x3FDED001, 0x3FDEF000,
+		0x3FDF1001, 0x3FDF3000, 0x3FDF5001, 0x3FDF7000, 0x3FDF9001, 0x3FDFB000, 0x3FDFD001, 0x3FDFF000,
+		0x3FE01001, 0x3FE03000, 0x3FE05001, 0x3FE07000, 0x3FE09001, 0x3FE0B000, 0x3FE0D001, 0x3FE0F000,
+		0x3FE11001, 0x3FE13000, 0x3FE15001, 0x3FE17000, 0x3FE19001, 0x3FE1B000, 0x3FE1D001, 0x3FE1F000,
+		0x3FE21001, 0x3FE23000, 0x3FE25001, 0x3FE27000, 0x3FE29001, 0x3FE2B000, 0x3FE2D001, 0x3FE2F000,
+		0x3FE31001, 0x3FE33000, 0x3FE35001, 0x3FE37000, 0x3FE39001, 0x3FE3B000, 0x3FE3D001, 0x3FE3F000,
+		0x3FE41001, 0x3FE43000, 0x3FE45001, 0x3FE47000, 0x3FE49001, 0x3FE4B000, 0x3FE4D001, 0x3FE4F000,
+		0x3FE51001, 0x3FE53000, 0x3FE55001, 0x3FE57000, 0x3FE59001, 0x3FE5B000, 0x3FE5D001, 0x3FE5F000,
+		0x3FE61001, 0x3FE63000, 0x3FE65001, 0x3FE67000, 0x3FE69001, 0x3FE6B000, 0x3FE6D001, 0x3FE6F000,
+		0x3FE71001, 0x3FE73000, 0x3FE75001, 0x3FE77000, 0x3FE79001, 0x3FE7B000, 0x3FE7D001, 0x3FE7F000,
+		0x3FE81001, 0x3FE83000, 0x3FE85001, 0x3FE87000, 0x3FE89001, 0x3FE8B000, 0x3FE8D001, 0x3FE8F000,
+		0x3FE91001, 0x3FE93000, 0x3FE95001, 0x3FE97000, 0x3FE99001, 0x3FE9B000, 0x3FE9D001, 0x3FE9F000,
+		0x3FEA1001, 0x3FEA3000, 0x3FEA5001, 0x3FEA7000, 0x3FEA9001, 0x3FEAB000, 0x3FEAD001, 0x3FEAF000,
+		0x3FEB1001, 0x3FEB3000, 0x3FEB5001, 0x3FEB7000, 0x3FEB9001, 0x3FEBB000, 0x3FEBD001, 0x3FEBF000,
+		0x3FEC1001, 0x3FEC3000, 0x3FEC5001, 0x3FEC7000, 0x3FEC9001, 0x3FECB000, 0x3FECD001, 0x3FECF000,
+		0x3FED1001, 0x3FED3000, 0x3FED5001, 0x3FED7000, 0x3FED9001, 0x3FEDB000, 0x3FEDD001, 0x3FEDF000,
+		0x3FEE1001, 0x3FEE3000, 0x3FEE5001, 0x3FEE7000, 0x3FEE9001, 0x3FEEB000, 0x3FEED001, 0x3FEEF000,
+		0x3FEF1001, 0x3FEF3000, 0x3FEF5001, 0x3FEF7000, 0x3FEF9001, 0x3FEFB000, 0x3FEFD001, 0x3FEFF000,
+		0x3FF01001, 0x3FF03000, 0x3FF05001, 0x3FF07000, 0x3FF09001, 0x3FF0B000, 0x3FF0D001, 0x3FF0F000,
+		0x3FF11001, 0x3FF13000, 0x3FF15001, 0x3FF17000, 0x3FF19001, 0x3FF1B000, 0x3FF1D001, 0x3FF1F000,
+		0x3FF21001, 0x3FF23000, 0x3FF25001, 0x3FF27000, 0x3FF29001, 0x3FF2B000, 0x3FF2D001, 0x3FF2F000,
+		0x3FF31001, 0x3FF33000, 0x3FF35001, 0x3FF37000, 0x3FF39001, 0x3FF3B000, 0x3FF3D001, 0x3FF3F000,
+		0x3FF41001, 0x3FF43000, 0x3FF45001, 0x3FF47000, 0x3FF49001, 0x3FF4B000, 0x3FF4D001, 0x3FF4F000,
+		0x3FF51001, 0x3FF53000, 0x3FF55001, 0x3FF57000, 0x3FF59001, 0x3FF5B000, 0x3FF5D001, 0x3FF5F000,
+		0x3FF61001, 0x3FF63000, 0x3FF65001, 0x3FF67000, 0x3FF69001, 0x3FF6B000, 0x3FF6D001, 0x3FF6F000,
+		0x3FF71001, 0x3FF73000, 0x3FF75001, 0x3FF77000, 0x3FF79001, 0x3FF7B000, 0x3FF7D001, 0x3FF7F000,
+		0x3FF81001, 0x3FF83000, 0x3FF85001, 0x3FF87000, 0x3FF89001, 0x3FF8B000, 0x3FF8D001, 0x3FF8F000,
+		0x3FF91001, 0x3FF93000, 0x3FF95001, 0x3FF97000, 0x3FF99001, 0x3FF9B000, 0x3FF9D001, 0x3FF9F000,
+		0x3FFA1001, 0x3FFA3000, 0x3FFA5001, 0x3FFA7000, 0x3FFA9001, 0x3FFAB000, 0x3FFAD001, 0x3FFAF000,
+		0x3FFB1001, 0x3FFB3000, 0x3FFB5001, 0x3FFB7000, 0x3FFB9001, 0x3FFBB000, 0x3FFBD001, 0x3FFBF000,
+		0x3FFC1001, 0x3FFC3000, 0x3FFC5001, 0x3FFC7000, 0x3FFC9001, 0x3FFCB000, 0x3FFCD001, 0x3FFCF000,
+		0x3FFD1001, 0x3FFD3000, 0x3FFD5001, 0x3FFD7000, 0x3FFD9001, 0x3FFDB000, 0x3FFDD001, 0x3FFDF000,
+		0x3FFE1001, 0x3FFE3000, 0x3FFE5001, 0x3FFE7000, 0x3FFE9001, 0x3FFEB000, 0x3FFED001, 0x3FFEF000,
+		0x3FFF1001, 0x3FFF3000, 0x3FFF5001, 0x3FFF7000, 0x3FFF9001, 0x3FFFB000, 0x3FFFD001, 0x3FFFF000,
+	};
+
+	const uint32_t denormalizedRanges[1024] = {
+		0x33000001, 0x33C00000, 0x34200001, 0x34600000, 0x34900001, 0x34B00000, 0x34D00001, 0x34F00000,
+		0x35080001, 0x35180000, 0x35280001, 0x35380000, 0x35480001, 0x35580000, 0x35680001, 0x35780000,
+		0x35840001, 0x358C0000, 0x35940001, 0x359C0000, 0x35A40001, 0x35AC0000, 0x35B40001, 0x35BC0000,
+		0x35C40001, 0x35CC0000, 0x35D40001, 0x35DC0000, 0x35E40001, 0x35EC0000, 0x35F40001, 0x35FC0000,
+		0x36020001, 0x36060000, 0x360A0001, 0x360E0000, 0x36120001, 0x36160000, 0x361A0001, 0x361E0000,
+		0x36220001, 0x36260000, 0x362A0001, 0x362E0000, 0x36320001, 0x36360000, 0x363A0001, 0x363E0000,
+		0x36420001, 0x36460000, 0x364A0001, 0x364E0000, 0x36520001, 0x36560000, 0x365A0001, 0x365E0000,
+		0x36620001, 0x36660000, 0x366A0001, 0x366E0000, 0x36720001, 0x36760000, 0x367A0001, 0x367E0000,
+		0x36810001, 0x36830000, 0x36850001, 0x36870000, 0x36890001, 0x368B0000, 0x368D0001, 0x368F0000,
+		0x36910001, 0x36930000, 0x36950001, 0x36970000, 0x36990001, 0x369B0000, 0x369D0001, 0x369F0000,
+		0x36A10001, 0x36A30000, 0x36A50001, 0x36A70000, 0x36A90001, 0x36AB0000, 0x36AD0001, 0x36AF0000,
+		0x36B10001, 0x36B30000, 0x36B50001, 0x36B70000, 0x36B90001, 0x36BB0000, 0x36BD0001, 0x36BF0000,
+		0x36C10001, 0x36C30000, 0x36C50001, 0x36C70000, 0x36C90001, 0x36CB0000, 0x36CD0001, 0x36CF0000,
+		0x36D10001, 0x36D30000, 0x36D50001, 0x36D70000, 0x36D90001, 0x36DB0000, 0x36DD0001, 0x36DF0000,
+		0x36E10001, 0x36E30000, 0x36E50001, 0x36E70000, 0x36E90001, 0x36EB0000, 0x36ED0001, 0x36EF0000,
+		0x36F10001, 0x36F30000, 0x36F50001, 0x36F70000, 0x36F90001, 0x36FB0000, 0x36FD0001, 0x36FF0000,
+		0x37008001, 0x37018000, 0x37028001, 0x37038000, 0x37048001, 0x37058000, 0x37068001, 0x37078000,
+		0x37088001, 0x37098000, 0x370A8001, 0x370B8000, 0x370C8001, 0x370D8000, 0x370E8001, 0x370F8000,
+		0x37108001, 0x37118000, 0x37128001, 0x37138000, 0x37148001, 0x37158000, 0x37168001, 0x37178000,
+		0x37188001, 0x37198000, 0x371A8001, 0x371B8000, 0x371C8001, 0x371D8000, 0x371E8001, 0x371F8000,
+		0x37208001, 0x37218000, 0x37228001, 0x37238000, 0x37248001, 0x37258000, 0x37268001, 0x37278000,
+		0x37288001, 0x37298000, 0x372A8001, 0x372B8000, 0x372C8001, 0x372D8000, 0x372E8001, 0x372F8000,
+		0x37308001, 0x37318000, 0x37328001, 0x37338000, 0x37348001, 0x37358000, 0x37368001, 0x37378000,
+		0x37388001, 0x37398000, 0x373A8001, 0x373B8000, 0x373C8001, 0x373D8000, 0x373E8001, 0x373F8000,
+		0x37408001, 0x37418000, 0x37428001, 0x37438000, 0x37448001, 0x37458000, 0x37468001, 0x37478000,
+		0x37488001, 0x37498000, 0x374A8001, 0x374B8000, 0x374C8001, 0x374D8000, 0x374E8001, 0x374F8000,
+		0x37508001, 0x37518000, 0x37528001, 0x37538000, 0x37548001, 0x37558000, 0x37568001, 0x37578000,
+		0x37588001, 0x37598000, 0x375A8001, 0x375B8000, 0x375C8001, 0x375D8000, 0x375E8001, 0x375F8000,
+		0x37608001, 0x37618000, 0x37628001, 0x37638000, 0x37648001, 0x37658000, 0x37668001, 0x37678000,
+		0x37688001, 0x37698000, 0x376A8001, 0x376B8000, 0x376C8001, 0x376D8000, 0x376E8001, 0x376F8000,
+		0x37708001, 0x37718000, 0x37728001, 0x37738000, 0x37748001, 0x37758000, 0x37768001, 0x37778000,
+		0x37788001, 0x37798000, 0x377A8001, 0x377B8000, 0x377C8001, 0x377D8000, 0x377E8001, 0x377F8000,
+		0x37804001, 0x3780C000, 0x37814001, 0x3781C000, 0x37824001, 0x3782C000, 0x37834001, 0x3783C000,
+		0x37844001, 0x3784C000, 0x37854001, 0x3785C000, 0x37864001, 0x3786C000, 0x37874001, 0x3787C000,
+		0x37884001, 0x3788C000, 0x37894001, 0x3789C000, 0x378A4001, 0x378AC000, 0x378B4001, 0x378BC000,
+		0x378C4001, 0x378CC000, 0x378D4001, 0x378DC000, 0x378E4001, 0x378EC000, 0x378F4001, 0x378FC000,
+		0x37904001, 0x3790C000, 0x37914001, 0x3791C000, 0x37924001, 0x3792C000, 0x37934001, 0x3793C000,
+		0x37944001, 0x3794C000, 0x37954001, 0x3795C000, 0x37964001, 0x3796C000, 0x37974001, 0x3797C000,
+		0x37984001, 0x3798C000, 0x37994001, 0x3799C000, 0x379A4001, 0x379AC000, 0x379B4001, 0x379BC000,
+		0x379C4001, 0x379CC000, 0x379D4001, 0x379DC000, 0x379E4001, 0x379EC000, 0x379F4001, 0x379FC000,
+		0x37A04001, 0x37A0C000, 0x37A14001, 0x37A1C000, 0x37A24001, 0x37A2C000, 0x37A34001, 0x37A3C000,
+		0x37A44001, 0x37A4C000, 0x37A54001, 0x37A5C000, 0x37A64001, 0x37A6C000, 0x37A74001, 0x37A7C000,
+		0x37A84001, 0x37A8C000, 0x37A94001, 0x37A9C000, 0x37AA4001, 0x37AAC000, 0x37AB4001, 0x37ABC000,
+		0x37AC4001, 0x37ACC000, 0x37AD4001, 0x37ADC000, 0x37AE4001, 0x37AEC000, 0x37AF4001, 0x37AFC000,
+		0x37B04001, 0x37B0C000, 0x37B14001, 0x37B1C000, 0x37B24001, 0x37B2C000, 0x37B34001, 0x37B3C000,
+		0x37B44001, 0x37B4C000, 0x37B54001, 0x37B5C000, 0x37B64001, 0x37B6C000, 0x37B74001, 0x37B7C000,
+		0x37B84001, 0x37B8C000, 0x37B94001, 0x37B9C000, 0x37BA4001, 0x37BAC000, 0x37BB4001, 0x37BBC000,
+		0x37BC4001, 0x37BCC000, 0x37BD4001, 0x37BDC000, 0x37BE4001, 0x37BEC000, 0x37BF4001, 0x37BFC000,
+		0x37C04001, 0x37C0C000, 0x37C14001, 0x37C1C000, 0x37C24001, 0x37C2C000, 0x37C34001, 0x37C3C000,
+		0x37C44001, 0x37C4C000, 0x37C54001, 0x37C5C000, 0x37C64001, 0x37C6C000, 0x37C74001, 0x37C7C000,
+		0x37C84001, 0x37C8C000, 0x37C94001, 0x37C9C000, 0x37CA4001, 0x37CAC000, 0x37CB4001, 0x37CBC000,
+		0x37CC4001, 0x37CCC000, 0x37CD4001, 0x37CDC000, 0x37CE4001, 0x37CEC000, 0x37CF4001, 0x37CFC000,
+		0x37D04001, 0x37D0C000, 0x37D14001, 0x37D1C000, 0x37D24001, 0x37D2C000, 0x37D34001, 0x37D3C000,
+		0x37D44001, 0x37D4C000, 0x37D54001, 0x37D5C000, 0x37D64001, 0x37D6C000, 0x37D74001, 0x37D7C000,
+		0x37D84001, 0x37D8C000, 0x37D94001, 0x37D9C000, 0x37DA4001, 0x37DAC000, 0x37DB4001, 0x37DBC000,
+		0x37DC4001, 0x37DCC000, 0x37DD4001, 0x37DDC000, 0x37DE4001, 0x37DEC000, 0x37DF4001, 0x37DFC000,
+		0x37E04001, 0x37E0C000, 0x37E14001, 0x37E1C000, 0x37E24001, 0x37E2C000, 0x37E34001, 0x37E3C000,
+		0x37E44001, 0x37E4C000, 0x37E54001, 0x37E5C000, 0x37E64001, 0x37E6C000, 0x37E74001, 0x37E7C000,
+		0x37E84001, 0x37E8C000, 0x37E94001, 0x37E9C000, 0x37EA4001, 0x37EAC000, 0x37EB4001, 0x37EBC000,
+		0x37EC4001, 0x37ECC000, 0x37ED4001, 0x37EDC000, 0x37EE4001, 0x37EEC000, 0x37EF4001, 0x37EFC000,
+		0x37F04001, 0x37F0C000, 0x37F14001, 0x37F1C000, 0x37F24001, 0x37F2C000, 0x37F34001, 0x37F3C000,
+		0x37F44001, 0x37F4C000, 0x37F54001, 0x37F5C000, 0x37F64001, 0x37F6C000, 0x37F74001, 0x37F7C000,
+		0x37F84001, 0x37F8C000, 0x37F94001, 0x37F9C000, 0x37FA4001, 0x37FAC000, 0x37FB4001, 0x37FBC000,
+		0x37FC4001, 0x37FCC000, 0x37FD4001, 0x37FDC000, 0x37FE4001, 0x37FEC000, 0x37FF4001, 0x37FFC000,
+		0x38002001, 0x38006000, 0x3800A001, 0x3800E000, 0x38012001, 0x38016000, 0x3801A001, 0x3801E000,
+		0x38022001, 0x38026000, 0x3802A001, 0x3802E000, 0x38032001, 0x38036000, 0x3803A001, 0x3803E000,
+		0x38042001, 0x38046000, 0x3804A001, 0x3804E000, 0x38052001, 0x38056000, 0x3805A001, 0x3805E000,
+		0x38062001, 0x38066000, 0x3806A001, 0x3806E000, 0x38072001, 0x38076000, 0x3807A001, 0x3807E000,
+		0x38082001, 0x38086000, 0x3808A001, 0x3808E000, 0x38092001, 0x38096000, 0x3809A001, 0x3809E000,
+		0x380A2001, 0x380A6000, 0x380AA001, 0x380AE000, 0x380B2001, 0x380B6000, 0x380BA001, 0x380BE000,
+		0x380C2001, 0x380C6000, 0x380CA001, 0x380CE000, 0x380D2001, 0x380D6000, 0x380DA001, 0x380DE000,
+		0x380E2001, 0x380E6000, 0x380EA001, 0x380EE000, 0x380F2001, 0x380F6000, 0x380FA001, 0x380FE000,
+		0x38102001, 0x38106000, 0x3810A001, 0x3810E000, 0x38112001, 0x38116000, 0x3811A001, 0x3811E000,
+		0x38122001, 0x38126000, 0x3812A001, 0x3812E000, 0x38132001, 0x38136000, 0x3813A001, 0x3813E000,
+		0x38142001, 0x38146000, 0x3814A001, 0x3814E000, 0x38152001, 0x38156000, 0x3815A001, 0x3815E000,
+		0x38162001, 0x38166000, 0x3816A001, 0x3816E000, 0x38172001, 0x38176000, 0x3817A001, 0x3817E000,
+		0x38182001, 0x38186000, 0x3818A001, 0x3818E000, 0x38192001, 0x38196000, 0x3819A001, 0x3819E000,
+		0x381A2001, 0x381A6000, 0x381AA001, 0x381AE000, 0x381B2001, 0x381B6000, 0x381BA001, 0x381BE000,
+		0x381C2001, 0x381C6000, 0x381CA001, 0x381CE000, 0x381D2001, 0x381D6000, 0x381DA001, 0x381DE000,
+		0x381E2001, 0x381E6000, 0x381EA001, 0x381EE000, 0x381F2001, 0x381F6000, 0x381FA001, 0x381FE000,
+		0x38202001, 0x38206000, 0x3820A001, 0x3820E000, 0x38212001, 0x38216000, 0x3821A001, 0x3821E000,
+		0x38222001, 0x38226000, 0x3822A001, 0x3822E000, 0x38232001, 0x38236000, 0x3823A001, 0x3823E000,
+		0x38242001, 0x38246000, 0x3824A001, 0x3824E000, 0x38252001, 0x38256000, 0x3825A001, 0x3825E000,
+		0x38262001, 0x38266000, 0x3826A001, 0x3826E000, 0x38272001, 0x38276000, 0x3827A001, 0x3827E000,
+		0x38282001, 0x38286000, 0x3828A001, 0x3828E000, 0x38292001, 0x38296000, 0x3829A001, 0x3829E000,
+		0x382A2001, 0x382A6000, 0x382AA001, 0x382AE000, 0x382B2001, 0x382B6000, 0x382BA001, 0x382BE000,
+		0x382C2001, 0x382C6000, 0x382CA001, 0x382CE000, 0x382D2001, 0x382D6000, 0x382DA001, 0x382DE000,
+		0x382E2001, 0x382E6000, 0x382EA001, 0x382EE000, 0x382F2001, 0x382F6000, 0x382FA001, 0x382FE000,
+		0x38302001, 0x38306000, 0x3830A001, 0x3830E000, 0x38312001, 0x38316000, 0x3831A001, 0x3831E000,
+		0x38322001, 0x38326000, 0x3832A001, 0x3832E000, 0x38332001, 0x38336000, 0x3833A001, 0x3833E000,
+		0x38342001, 0x38346000, 0x3834A001, 0x3834E000, 0x38352001, 0x38356000, 0x3835A001, 0x3835E000,
+		0x38362001, 0x38366000, 0x3836A001, 0x3836E000, 0x38372001, 0x38376000, 0x3837A001, 0x3837E000,
+		0x38382001, 0x38386000, 0x3838A001, 0x3838E000, 0x38392001, 0x38396000, 0x3839A001, 0x3839E000,
+		0x383A2001, 0x383A6000, 0x383AA001, 0x383AE000, 0x383B2001, 0x383B6000, 0x383BA001, 0x383BE000,
+		0x383C2001, 0x383C6000, 0x383CA001, 0x383CE000, 0x383D2001, 0x383D6000, 0x383DA001, 0x383DE000,
+		0x383E2001, 0x383E6000, 0x383EA001, 0x383EE000, 0x383F2001, 0x383F6000, 0x383FA001, 0x383FE000,
+		0x38402001, 0x38406000, 0x3840A001, 0x3840E000, 0x38412001, 0x38416000, 0x3841A001, 0x3841E000,
+		0x38422001, 0x38426000, 0x3842A001, 0x3842E000, 0x38432001, 0x38436000, 0x3843A001, 0x3843E000,
+		0x38442001, 0x38446000, 0x3844A001, 0x3844E000, 0x38452001, 0x38456000, 0x3845A001, 0x3845E000,
+		0x38462001, 0x38466000, 0x3846A001, 0x3846E000, 0x38472001, 0x38476000, 0x3847A001, 0x3847E000,
+		0x38482001, 0x38486000, 0x3848A001, 0x3848E000, 0x38492001, 0x38496000, 0x3849A001, 0x3849E000,
+		0x384A2001, 0x384A6000, 0x384AA001, 0x384AE000, 0x384B2001, 0x384B6000, 0x384BA001, 0x384BE000,
+		0x384C2001, 0x384C6000, 0x384CA001, 0x384CE000, 0x384D2001, 0x384D6000, 0x384DA001, 0x384DE000,
+		0x384E2001, 0x384E6000, 0x384EA001, 0x384EE000, 0x384F2001, 0x384F6000, 0x384FA001, 0x384FE000,
+		0x38502001, 0x38506000, 0x3850A001, 0x3850E000, 0x38512001, 0x38516000, 0x3851A001, 0x3851E000,
+		0x38522001, 0x38526000, 0x3852A001, 0x3852E000, 0x38532001, 0x38536000, 0x3853A001, 0x3853E000,
+		0x38542001, 0x38546000, 0x3854A001, 0x3854E000, 0x38552001, 0x38556000, 0x3855A001, 0x3855E000,
+		0x38562001, 0x38566000, 0x3856A001, 0x3856E000, 0x38572001, 0x38576000, 0x3857A001, 0x3857E000,
+		0x38582001, 0x38586000, 0x3858A001, 0x3858E000, 0x38592001, 0x38596000, 0x3859A001, 0x3859E000,
+		0x385A2001, 0x385A6000, 0x385AA001, 0x385AE000, 0x385B2001, 0x385B6000, 0x385BA001, 0x385BE000,
+		0x385C2001, 0x385C6000, 0x385CA001, 0x385CE000, 0x385D2001, 0x385D6000, 0x385DA001, 0x385DE000,
+		0x385E2001, 0x385E6000, 0x385EA001, 0x385EE000, 0x385F2001, 0x385F6000, 0x385FA001, 0x385FE000,
+		0x38602001, 0x38606000, 0x3860A001, 0x3860E000, 0x38612001, 0x38616000, 0x3861A001, 0x3861E000,
+		0x38622001, 0x38626000, 0x3862A001, 0x3862E000, 0x38632001, 0x38636000, 0x3863A001, 0x3863E000,
+		0x38642001, 0x38646000, 0x3864A001, 0x3864E000, 0x38652001, 0x38656000, 0x3865A001, 0x3865E000,
+		0x38662001, 0x38666000, 0x3866A001, 0x3866E000, 0x38672001, 0x38676000, 0x3867A001, 0x3867E000,
+		0x38682001, 0x38686000, 0x3868A001, 0x3868E000, 0x38692001, 0x38696000, 0x3869A001, 0x3869E000,
+		0x386A2001, 0x386A6000, 0x386AA001, 0x386AE000, 0x386B2001, 0x386B6000, 0x386BA001, 0x386BE000,
+		0x386C2001, 0x386C6000, 0x386CA001, 0x386CE000, 0x386D2001, 0x386D6000, 0x386DA001, 0x386DE000,
+		0x386E2001, 0x386E6000, 0x386EA001, 0x386EE000, 0x386F2001, 0x386F6000, 0x386FA001, 0x386FE000,
+		0x38702001, 0x38706000, 0x3870A001, 0x3870E000, 0x38712001, 0x38716000, 0x3871A001, 0x3871E000,
+		0x38722001, 0x38726000, 0x3872A001, 0x3872E000, 0x38732001, 0x38736000, 0x3873A001, 0x3873E000,
+		0x38742001, 0x38746000, 0x3874A001, 0x3874E000, 0x38752001, 0x38756000, 0x3875A001, 0x3875E000,
+		0x38762001, 0x38766000, 0x3876A001, 0x3876E000, 0x38772001, 0x38776000, 0x3877A001, 0x3877E000,
+		0x38782001, 0x38786000, 0x3878A001, 0x3878E000, 0x38792001, 0x38796000, 0x3879A001, 0x3879E000,
+		0x387A2001, 0x387A6000, 0x387AA001, 0x387AE000, 0x387B2001, 0x387B6000, 0x387BA001, 0x387BE000,
+		0x387C2001, 0x387C6000, 0x387CA001, 0x387CE000, 0x387D2001, 0x387D6000, 0x387DA001, 0x387DE000,
+		0x387E2001, 0x387E6000, 0x387EA001, 0x387EE000, 0x387F2001, 0x387F6000, 0x387FA001, 0x387FE000,
+	};
+}
diff --git a/test/tables.h b/test/tables.h
new file mode 100644
index 0000000..942c542
--- /dev/null
+++ b/test/tables.h
@@ -0,0 +1,13 @@
+#include <cstdint>
+
+
+namespace fp16 {
+	/* FP32 conversion results for FP16 numbers in range [1.0h, 2.0h) */
+	extern const uint32_t normalizedValues[1024];
+	/* FP32 conversion results for FP16 numbers in range [0.0h, HALF_MIN) */
+	extern const uint32_t denormalizedValues[1024];
+	/* FP32 numbers such that FP16(fp32) < as_half(as_uint16(1.0h) | index) for fp32 < normalizedRanges[index] */
+	extern const uint32_t normalizedRanges[1024];
+	/* FP32 numbers such that FP16(fp32) < as_half(index) for fp32 < denormalizedRanges[index] */
+	extern const uint32_t denormalizedRanges[1024];
+}