2x faster FP32->IEEE FP16 conversion
Additionally, it now correctly handles NaNs on PNaCl and
Emscripten/Asm.js
diff --git a/include/fp16/fp16.h b/include/fp16/fp16.h
index ab10fec..6fde4ca 100644
--- a/include/fp16/fp16.h
+++ b/include/fp16/fp16.h
@@ -207,24 +207,24 @@
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline uint16_t fp16_ieee_from_fp32_value(float f) {
- float base = fabsf(f);
const float scale_to_inf = 0x1.0p+112f;
- base *= scale_to_inf;
- const float scale_to_zero = 0x1.0p-112f * 0x1.0p+2f;
- base *= scale_to_zero;
- if (!(base == base)) {
- base = nanf("0x200");
+ const float scale_to_zero = 0x1.0p-110f;
+ float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
+
+ const uint32_t w = fp32_to_bits(f);
+ const uint32_t shl1_w = w + w;
+ const uint32_t sign = w & UINT32_C(0x80000000);
+ uint32_t bias = shl1_w & UINT32_C(0xFF000000);
+ if (bias < UINT32_C(0x71000000)) {
+ bias = UINT32_C(0x71000000);
}
- const uint32_t sign = fp32_to_bits(f) & UINT32_C(0x80000000);
- float bias = f * (0x1.0p+23f * 0x1.0p-10f * 0x1.0p+2f);
- bias = fp32_from_bits(fp32_to_bits(bias) & UINT32_C(0x7F800000));
- if (bias < (0x1p-1f * 0x1.0p+2f)) {
- bias = (0x1p-1f * 0x1.0p+2f);
- }
- bias += base;
- const uint32_t exp_f = fp32_to_bits(bias) >> 13;// - (((0x7F - 0xF) + (23 - 10 + 1 + 2)) << 10);
- return (sign >> 16) | ((exp_f & UINT32_C(0x00007C00)) + (fp32_to_bits(bias) & UINT32_C(0x00000FFF)));
+ base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
+ const uint32_t bits = fp32_to_bits(base);
+ const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
+ const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
+ const uint32_t nonsign = exp_bits + mantissa_bits;
+ return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
}
/*