blob: c98f6dc6d7613c0baa32b3514d703f7d52beea7d [file] [log] [blame]
#include <c10/util/Float8_e5m2fnuz.h>
#include <array>
#include <iostream>
namespace c10 {
namespace detail {
C10_HOST_DEVICE float fp8e5m2fnuz_to_fp32_value(uint8_t input) {
constexpr std::array<float, 256> e5m2fnuz_lut = {
0.0f,
7.62939453125e-06f,
1.52587890625e-05f,
2.288818359375e-05f,
3.0517578125e-05f,
3.814697265625e-05f,
4.57763671875e-05f,
5.340576171875e-05f,
6.103515625e-05f,
7.62939453125e-05f,
9.1552734375e-05f,
0.0001068115234375f,
0.0001220703125f,
0.000152587890625f,
0.00018310546875f,
0.000213623046875f,
0.000244140625f,
0.00030517578125f,
0.0003662109375f,
0.00042724609375f,
0.00048828125f,
0.0006103515625f,
0.000732421875f,
0.0008544921875f,
0.0009765625f,
0.001220703125f,
0.00146484375f,
0.001708984375f,
0.001953125f,
0.00244140625f,
0.0029296875f,
0.00341796875f,
0.00390625f,
0.0048828125f,
0.005859375f,
0.0068359375f,
0.0078125f,
0.009765625f,
0.01171875f,
0.013671875f,
0.015625f,
0.01953125f,
0.0234375f,
0.02734375f,
0.03125f,
0.0390625f,
0.046875f,
0.0546875f,
0.0625f,
0.078125f,
0.09375f,
0.109375f,
0.125f,
0.15625f,
0.1875f,
0.21875f,
0.25f,
0.3125f,
0.375f,
0.4375f,
0.5f,
0.625f,
0.75f,
0.875f,
1.0f,
1.25f,
1.5f,
1.75f,
2.0f,
2.5f,
3.0f,
3.5f,
4.0f,
5.0f,
6.0f,
7.0f,
8.0f,
10.0f,
12.0f,
14.0f,
16.0f,
20.0f,
24.0f,
28.0f,
32.0f,
40.0f,
48.0f,
56.0f,
64.0f,
80.0f,
96.0f,
112.0f,
128.0f,
160.0f,
192.0f,
224.0f,
256.0f,
320.0f,
384.0f,
448.0f,
512.0f,
640.0f,
768.0f,
896.0f,
1024.0f,
1280.0f,
1536.0f,
1792.0f,
2048.0f,
2560.0f,
3072.0f,
3584.0f,
4096.0f,
5120.0f,
6144.0f,
7168.0f,
8192.0f,
10240.0f,
12288.0f,
14336.0f,
16384.0f,
20480.0f,
24576.0f,
28672.0f,
32768.0f,
40960.0f,
49152.0f,
57344.0f,
std::numeric_limits<float>::signaling_NaN(),
-7.62939453125e-06f,
-1.52587890625e-05f,
-2.288818359375e-05f,
-3.0517578125e-05f,
-3.814697265625e-05f,
-4.57763671875e-05f,
-5.340576171875e-05f,
-6.103515625e-05f,
-7.62939453125e-05f,
-9.1552734375e-05f,
-0.0001068115234375f,
-0.0001220703125f,
-0.000152587890625f,
-0.00018310546875f,
-0.000213623046875f,
-0.000244140625f,
-0.00030517578125f,
-0.0003662109375f,
-0.00042724609375f,
-0.00048828125f,
-0.0006103515625f,
-0.000732421875f,
-0.0008544921875f,
-0.0009765625f,
-0.001220703125f,
-0.00146484375f,
-0.001708984375f,
-0.001953125f,
-0.00244140625f,
-0.0029296875f,
-0.00341796875f,
-0.00390625f,
-0.0048828125f,
-0.005859375f,
-0.0068359375f,
-0.0078125f,
-0.009765625f,
-0.01171875f,
-0.013671875f,
-0.015625f,
-0.01953125f,
-0.0234375f,
-0.02734375f,
-0.03125f,
-0.0390625f,
-0.046875f,
-0.0546875f,
-0.0625f,
-0.078125f,
-0.09375f,
-0.109375f,
-0.125f,
-0.15625f,
-0.1875f,
-0.21875f,
-0.25f,
-0.3125f,
-0.375f,
-0.4375f,
-0.5f,
-0.625f,
-0.75f,
-0.875f,
-1.0f,
-1.25f,
-1.5f,
-1.75f,
-2.0f,
-2.5f,
-3.0f,
-3.5f,
-4.0f,
-5.0f,
-6.0f,
-7.0f,
-8.0f,
-10.0f,
-12.0f,
-14.0f,
-16.0f,
-20.0f,
-24.0f,
-28.0f,
-32.0f,
-40.0f,
-48.0f,
-56.0f,
-64.0f,
-80.0f,
-96.0f,
-112.0f,
-128.0f,
-160.0f,
-192.0f,
-224.0f,
-256.0f,
-320.0f,
-384.0f,
-448.0f,
-512.0f,
-640.0f,
-768.0f,
-896.0f,
-1024.0f,
-1280.0f,
-1536.0f,
-1792.0f,
-2048.0f,
-2560.0f,
-3072.0f,
-3584.0f,
-4096.0f,
-5120.0f,
-6144.0f,
-7168.0f,
-8192.0f,
-10240.0f,
-12288.0f,
-14336.0f,
-16384.0f,
-20480.0f,
-24576.0f,
-28672.0f,
-32768.0f,
-40960.0f,
-49152.0f,
-57344.0f,
};
return e5m2fnuz_lut[input];
}
} // namespace detail
static_assert(
std::is_standard_layout_v<Float8_e5m2fnuz>,
"c10::Float8_e5m2 must be standard layout.");
std::ostream& operator<<(std::ostream& out, const Float8_e5m2fnuz& value) {
out << (float)value;
return out;
}
} // namespace c10