| // Translated from C to Rust. The original C code can be found at |
| // https://github.com/ulfjack/ryu and carries the following license: |
| // |
| // Copyright 2018 Ulf Adams |
| // |
| // The contents of this file may be used under the terms of the Apache License, |
| // Version 2.0. |
| // |
| // (See accompanying file LICENSE-Apache or copy at |
| // http://www.apache.org/licenses/LICENSE-2.0) |
| // |
| // Alternatively, the contents of this file may be used under the terms of |
| // the Boost Software License, Version 1.0. |
| // (See accompanying file LICENSE-Boost or copy at |
| // https://www.boost.org/LICENSE_1_0.txt) |
| // |
| // Unless required by applicable law or agreed to in writing, this software |
| // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. |
| |
| use core::{mem, ptr}; |
| |
| use common::*; |
| #[cfg(not(feature = "small"))] |
| use d2s_full_table::*; |
| #[cfg(feature = "small")] |
| use d2s_small_table::*; |
| use digit_table::*; |
| use d2s_intrinsics::*; |
| |
| #[cfg(feature = "no-panic")] |
| use no_panic::no_panic; |
| |
| pub const DOUBLE_MANTISSA_BITS: u32 = 52; |
| pub const DOUBLE_EXPONENT_BITS: u32 = 11; |
| |
| const DOUBLE_POW5_INV_BITCOUNT: i32 = 122; |
| const DOUBLE_POW5_BITCOUNT: i32 = 121; |
| |
| #[cfg_attr(feature = "no-panic", inline)] |
| fn pow5_factor(mut value: u64) -> u32 { |
| let mut count = 0u32; |
| loop { |
| debug_assert!(value != 0); |
| let q = div5(value); |
| let r = (value - 5 * q) as u32; |
| if r != 0 { |
| break; |
| } |
| value = q; |
| count += 1; |
| } |
| count |
| } |
| |
| // Returns true if value is divisible by 5^p. |
| #[cfg_attr(feature = "no-panic", inline)] |
| fn multiple_of_power_of_5(value: u64, p: u32) -> bool { |
| // I tried a case distinction on p, but there was no performance difference. |
| pow5_factor(value) >= p |
| } |
| |
| // Returns true if value is divisible by 2^p. |
| #[cfg_attr(feature = "no-panic", inline)] |
| fn multiple_of_power_of_2(value: u64, p: u32) -> bool { |
| // return __builtin_ctzll(value) >= p; |
| (value & ((1u64 << p) - 1)) == 0 |
| } |
| |
| #[cfg(integer128)] |
| #[cfg_attr(feature = "no-panic", inline)] |
| fn mul_shift(m: u64, mul: &(u64, u64), j: u32) -> u64 { |
| let b0 = m as u128 * mul.0 as u128; |
| let b2 = m as u128 * mul.1 as u128; |
| (((b0 >> 64) + b2) >> (j - 64)) as u64 |
| } |
| |
| #[cfg(integer128)] |
| #[cfg_attr(feature = "no-panic", inline)] |
| fn mul_shift_all( |
| m: u64, |
| mul: &(u64, u64), |
| j: u32, |
| vp: &mut u64, |
| vm: &mut u64, |
| mm_shift: u32, |
| ) -> u64 { |
| *vp = mul_shift(4 * m + 2, mul, j); |
| *vm = mul_shift(4 * m - 1 - mm_shift as u64, mul, j); |
| mul_shift(4 * m, mul, j) |
| } |
| |
| #[cfg(not(integer128))] |
| #[cfg_attr(feature = "no-panic", inline)] |
| fn mul_shift_all( |
| mut m: u64, |
| mul: &(u64, u64), |
| j: u32, |
| vp: &mut u64, |
| vm: &mut u64, |
| mm_shift: u32, |
| ) -> u64 { |
| m <<= 1; |
| // m is maximum 55 bits |
| let (lo, tmp) = umul128(m, mul.0); |
| let (mut mid, mut hi) = umul128(m, mul.1); |
| mid = mid.wrapping_add(tmp); |
| hi = hi.wrapping_add((mid < tmp) as u64); // overflow into hi |
| |
| let lo2 = lo.wrapping_add(mul.0); |
| let mid2 = mid.wrapping_add(mul.1).wrapping_add((lo2 < lo) as u64); |
| let hi2 = hi.wrapping_add((mid2 < mid) as u64); |
| *vp = shiftright128(mid2, hi2, j - 64 - 1); |
| |
| if mm_shift == 1 { |
| let lo3 = lo.wrapping_sub(mul.0); |
| let mid3 = mid.wrapping_sub(mul.1).wrapping_sub((lo3 > lo) as u64); |
| let hi3 = hi.wrapping_sub((mid3 > mid) as u64); |
| *vm = shiftright128(mid3, hi3, j - 64 - 1); |
| } else { |
| let lo3 = lo + lo; |
| let mid3 = mid.wrapping_add(mid).wrapping_add((lo3 < lo) as u64); |
| let hi3 = hi.wrapping_add(hi).wrapping_add((mid3 < mid) as u64); |
| let lo4 = lo3.wrapping_sub(mul.0); |
| let mid4 = mid3.wrapping_sub(mul.1).wrapping_sub((lo4 > lo3) as u64); |
| let hi4 = hi3.wrapping_sub((mid4 > mid3) as u64); |
| *vm = shiftright128(mid4, hi4, j - 64); |
| } |
| |
| shiftright128(mid, hi, j - 64 - 1) |
| } |
| |
| #[cfg_attr(feature = "no-panic", inline)] |
| pub fn decimal_length(v: u64) -> u32 { |
| // This is slightly faster than a loop. |
| // The average output length is 16.38 digits, so we check high-to-low. |
| // Function precondition: v is not an 18, 19, or 20-digit number. |
| // (17 digits are sufficient for round-tripping.) |
| debug_assert!(v < 100000000000000000); |
| |
| if v >= 10000000000000000 { |
| 17 |
| } else if v >= 1000000000000000 { |
| 16 |
| } else if v >= 100000000000000 { |
| 15 |
| } else if v >= 10000000000000 { |
| 14 |
| } else if v >= 1000000000000 { |
| 13 |
| } else if v >= 100000000000 { |
| 12 |
| } else if v >= 10000000000 { |
| 11 |
| } else if v >= 1000000000 { |
| 10 |
| } else if v >= 100000000 { |
| 9 |
| } else if v >= 10000000 { |
| 8 |
| } else if v >= 1000000 { |
| 7 |
| } else if v >= 100000 { |
| 6 |
| } else if v >= 10000 { |
| 5 |
| } else if v >= 1000 { |
| 4 |
| } else if v >= 100 { |
| 3 |
| } else if v >= 10 { |
| 2 |
| } else { |
| 1 |
| } |
| } |
| |
| // A floating decimal representing m * 10^e. |
| pub struct FloatingDecimal64 { |
| pub mantissa: u64, |
| pub exponent: i32, |
| } |
| |
| #[cfg_attr(feature = "no-panic", inline)] |
| pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 { |
| let bias = (1u32 << (DOUBLE_EXPONENT_BITS - 1)) - 1; |
| |
| let (e2, m2) = if ieee_exponent == 0 { |
| ( |
| // We subtract 2 so that the bounds computation has 2 additional bits. |
| 1 - bias as i32 - DOUBLE_MANTISSA_BITS as i32 - 2, |
| ieee_mantissa, |
| ) |
| } else { |
| ( |
| ieee_exponent as i32 - bias as i32 - DOUBLE_MANTISSA_BITS as i32 - 2, |
| (1u64 << DOUBLE_MANTISSA_BITS) | ieee_mantissa, |
| ) |
| }; |
| let even = (m2 & 1) == 0; |
| let accept_bounds = even; |
| |
| // Step 2: Determine the interval of legal decimal representations. |
| let mv = 4 * m2; |
| // Implicit bool -> int conversion. True is 1, false is 0. |
| let mm_shift = (ieee_mantissa != 0 || ieee_exponent <= 1) as u32; |
| // We would compute mp and mm like this: |
| // uint64_t mp = 4 * m2 + 2; |
| // uint64_t mm = mv - 1 - mm_shift; |
| |
| // Step 3: Convert to a decimal power base using 128-bit arithmetic. |
| let mut vr: u64; |
| let mut vp: u64 = unsafe { mem::uninitialized() }; |
| let mut vm: u64 = unsafe { mem::uninitialized() }; |
| let e10: i32; |
| let mut vm_is_trailing_zeros = false; |
| let mut vr_is_trailing_zeros = false; |
| if e2 >= 0 { |
| // I tried special-casing q == 0, but there was no effect on performance. |
| // This expression is slightly faster than max(0, log10_pow2(e2) - 1). |
| let q = (log10_pow2(e2) - (e2 > 3) as i32) as u32; |
| e10 = q as i32; |
| let k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q as i32) as i32 - 1; |
| let i = -e2 + q as i32 + k; |
| vr = mul_shift_all( |
| m2, |
| #[cfg(feature = "small")] |
| unsafe { |
| &compute_inv_pow5(q) |
| }, |
| #[cfg(not(feature = "small"))] |
| unsafe { |
| debug_assert!(q < DOUBLE_POW5_INV_SPLIT.len() as u32); |
| DOUBLE_POW5_INV_SPLIT.get_unchecked(q as usize) |
| }, |
| i as u32, |
| &mut vp, |
| &mut vm, |
| mm_shift, |
| ); |
| if q <= 21 { |
| // This should use q <= 22, but I think 21 is also safe. Smaller values |
| // may still be safe, but it's more difficult to reason about them. |
| // Only one of mp, mv, and mm can be a multiple of 5, if any. |
| let mv_mod5 = (mv - 5 * div5(mv)) as u32; |
| if mv_mod5 == 0 { |
| vr_is_trailing_zeros = multiple_of_power_of_5(mv, q); |
| } else if accept_bounds { |
| // Same as min(e2 + (~mm & 1), pow5_factor(mm)) >= q |
| // <=> e2 + (~mm & 1) >= q && pow5_factor(mm) >= q |
| // <=> true && pow5_factor(mm) >= q, since e2 >= q. |
| vm_is_trailing_zeros = multiple_of_power_of_5(mv - 1 - mm_shift as u64, q); |
| } else { |
| // Same as min(e2 + 1, pow5_factor(mp)) >= q. |
| vp -= multiple_of_power_of_5(mv + 2, q) as u64; |
| } |
| } |
| } else { |
| // This expression is slightly faster than max(0, log10_pow5(-e2) - 1). |
| let q = (log10_pow5(-e2) - (-e2 > 1) as i32) as u32; |
| e10 = q as i32 + e2; |
| let i = -e2 - q as i32; |
| let k = pow5bits(i) as i32 - DOUBLE_POW5_BITCOUNT; |
| let j = q as i32 - k; |
| vr = mul_shift_all( |
| m2, |
| #[cfg(feature = "small")] |
| unsafe { |
| &compute_pow5(i as u32) |
| }, |
| #[cfg(not(feature = "small"))] |
| unsafe { |
| debug_assert!(i < DOUBLE_POW5_SPLIT.len() as i32); |
| DOUBLE_POW5_SPLIT.get_unchecked(i as usize) |
| }, |
| j as u32, |
| &mut vp, |
| &mut vm, |
| mm_shift, |
| ); |
| if q <= 1 { |
| // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. |
| // mv = 4 * m2, so it always has at least two trailing 0 bits. |
| vr_is_trailing_zeros = true; |
| if accept_bounds { |
| // mm = mv - 1 - mm_shift, so it has 1 trailing 0 bit iff mm_shift == 1. |
| vm_is_trailing_zeros = mm_shift == 1; |
| } else { |
| // mp = mv + 2, so it always has at least one trailing 0 bit. |
| vp -= 1; |
| } |
| } else if q < 63 { |
| // TODO(ulfjack): Use a tighter bound here. |
| // We need to compute min(ntz(mv), pow5_factor(mv) - e2) >= q - 1 |
| // <=> ntz(mv) >= q - 1 && pow5_factor(mv) - e2 >= q - 1 |
| // <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) |
| // <=> (mv & ((1 << (q - 1)) - 1)) == 0 |
| // We also need to make sure that the left shift does not overflow. |
| vr_is_trailing_zeros = multiple_of_power_of_2(mv, q - 1); |
| } |
| } |
| |
| // Step 4: Find the shortest decimal representation in the interval of legal representations. |
| let mut removed = 0u32; |
| let mut last_removed_digit = 0u8; |
| // On average, we remove ~2 digits. |
| let output = if vm_is_trailing_zeros || vr_is_trailing_zeros { |
| // General case, which happens rarely (~0.7%). |
| loop { |
| let vp_div10 = div10(vp); |
| let vm_div10 = div10(vm); |
| if vp_div10 <= vm_div10 { |
| break; |
| } |
| let vm_mod10 = (vm - 10 * vm_div10) as u32; |
| let vr_div10 = div10(vr); |
| let vr_mod10 = (vr - 10 * vr_div10) as u32; |
| vm_is_trailing_zeros &= vm_mod10 == 0; |
| vr_is_trailing_zeros &= last_removed_digit == 0; |
| last_removed_digit = vr_mod10 as u8; |
| vr = vr_div10; |
| vp = vp_div10; |
| vm = vm_div10; |
| removed += 1; |
| } |
| if vm_is_trailing_zeros { |
| loop { |
| let vm_div10 = div10(vm); |
| let vm_mod10 = (vm - 10 * vm_div10) as u32; |
| if vm_mod10 != 0 { |
| break; |
| } |
| let vp_div10 = div10(vp); |
| let vr_div10 = div10(vr); |
| let vr_mod10 = (vr - 10 * vr_div10) as u32; |
| vr_is_trailing_zeros &= last_removed_digit == 0; |
| last_removed_digit = vr_mod10 as u8; |
| vr = vr_div10; |
| vp = vp_div10; |
| vm = vm_div10; |
| removed += 1; |
| } |
| } |
| if vr_is_trailing_zeros && last_removed_digit == 5 && vr % 2 == 0 { |
| // Round even if the exact number is .....50..0. |
| last_removed_digit = 4; |
| } |
| // We need to take vr + 1 if vr is outside bounds or we need to round up. |
| vr + ((vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5) |
| as u64 |
| } else { |
| // Specialized for the common case (~99.3%). Percentages below are relative to this. |
| let mut round_up = false; |
| let vp_div100 = div100(vp); |
| let vm_div100 = div100(vm); |
| // Optimization: remove two digits at a time (~86.2%). |
| if vp_div100 > vm_div100 { |
| let vr_div100 = div100(vr); |
| let vr_mod100 = (vr - 100 * vr_div100) as u32; |
| round_up = vr_mod100 >= 50; |
| vr = vr_div100; |
| vp = vp_div100; |
| vm = vm_div100; |
| removed += 2; |
| } |
| // Loop iterations below (approximately), without optimization above: |
| // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02% |
| // Loop iterations below (approximately), with optimization above: |
| // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% |
| loop { |
| let vp_div10 = div10(vp); |
| let vm_div10 = div10(vm); |
| if vp_div10 <= vm_div10 { |
| break; |
| } |
| let vr_div10 = div10(vr); |
| let vr_mod10 = (vr - 10 * vr_div10) as u32; |
| round_up = vr_mod10 >= 5; |
| vr = vr_div10; |
| vp = vp_div10; |
| vm = vm_div10; |
| removed += 1; |
| } |
| // We need to take vr + 1 if vr is outside bounds or we need to round up. |
| vr + (vr == vm || round_up) as u64 |
| }; |
| let exp = e10 + removed as i32; |
| |
| FloatingDecimal64 { |
| exponent: exp, |
| mantissa: output, |
| } |
| } |
| |
| #[cfg_attr(feature = "no-panic", inline)] |
| unsafe fn to_chars(v: FloatingDecimal64, sign: bool, result: *mut u8) -> usize { |
| // Step 5: Print the decimal representation. |
| let mut index = 0isize; |
| if sign { |
| *result.offset(index) = b'-'; |
| index += 1; |
| } |
| |
| let mut output = v.mantissa; |
| let olength = decimal_length(output); |
| |
| // Print the decimal digits. |
| // The following code is equivalent to: |
| // for (uint32_t i = 0; i < olength - 1; ++i) { |
| // const uint32_t c = output % 10; output /= 10; |
| // result[index + olength - i] = (char) ('0' + c); |
| // } |
| // result[index] = '0' + output % 10; |
| |
| let mut i = 0isize; |
| // We prefer 32-bit operations, even on 64-bit platforms. |
| // We have at most 17 digits, and uint32_t can store 9 digits. |
| // If output doesn't fit into uint32_t, we cut off 8 digits, |
| // so the rest will fit into uint32_t. |
| if (output >> 32) != 0 { |
| // Expensive 64-bit division. |
| let q = div100_000_000(output); |
| let mut output2 = (output - 100_000_000 * q) as u32; |
| output = q; |
| |
| let c = output2 % 10000; |
| output2 /= 10000; |
| let d = output2 % 10000; |
| let c0 = (c % 100) << 1; |
| let c1 = (c / 100) << 1; |
| let d0 = (d % 100) << 1; |
| let d1 = (d / 100) << 1; |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked(c0 as usize), |
| result.offset(index + olength as isize - i - 1), |
| 2, |
| ); |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked(c1 as usize), |
| result.offset(index + olength as isize - i - 3), |
| 2, |
| ); |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked(d0 as usize), |
| result.offset(index + olength as isize - i - 5), |
| 2, |
| ); |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked(d1 as usize), |
| result.offset(index + olength as isize - i - 7), |
| 2, |
| ); |
| i += 8; |
| } |
| let mut output2 = output as u32; |
| while output2 >= 10000 { |
| let c = (output2 - 10000 * (output2 / 10000)) as u32; |
| output2 /= 10000; |
| let c0 = (c % 100) << 1; |
| let c1 = (c / 100) << 1; |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked(c0 as usize), |
| result.offset(index + olength as isize - i - 1), |
| 2, |
| ); |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked(c1 as usize), |
| result.offset(index + olength as isize - i - 3), |
| 2, |
| ); |
| i += 4; |
| } |
| if output2 >= 100 { |
| let c = ((output2 % 100) << 1) as u32; |
| output2 /= 100; |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked(c as usize), |
| result.offset(index + olength as isize - i - 1), |
| 2, |
| ); |
| i += 2; |
| } |
| if output2 >= 10 { |
| let c = (output2 << 1) as u32; |
| // We can't use memcpy here: the decimal dot goes between these two digits. |
| *result.offset(index + olength as isize - i) = *DIGIT_TABLE.get_unchecked(c as usize + 1); |
| *result.offset(index) = *DIGIT_TABLE.get_unchecked(c as usize); |
| } else { |
| *result.offset(index) = b'0' + output2 as u8; |
| } |
| |
| // Print decimal point if needed. |
| if olength > 1 { |
| *result.offset(index + 1) = b'.'; |
| index += olength as isize + 1; |
| } else { |
| index += 1; |
| } |
| |
| // Print the exponent. |
| *result.offset(index) = b'E'; |
| index += 1; |
| let mut exp = v.exponent as i32 + olength as i32 - 1; |
| if exp < 0 { |
| *result.offset(index) = b'-'; |
| index += 1; |
| exp = -exp; |
| } |
| |
| if exp >= 100 { |
| let c = exp % 10; |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked((2 * (exp / 10)) as usize), |
| result.offset(index), |
| 2, |
| ); |
| *result.offset(index + 2) = b'0' + c as u8; |
| index += 3; |
| } else if exp >= 10 { |
| ptr::copy_nonoverlapping( |
| DIGIT_TABLE.get_unchecked((2 * exp) as usize), |
| result.offset(index), |
| 2, |
| ); |
| index += 2; |
| } else { |
| *result.offset(index) = b'0' + exp as u8; |
| index += 1; |
| } |
| |
| debug_assert!(index <= 24); |
| index as usize |
| } |
| |
| /// Print f64 to the given buffer and return number of bytes written. Ryū's |
| /// original formatting. |
| /// |
| /// At most 24 bytes will be written. |
| /// |
| /// ## Special cases |
| /// |
| /// This function represents any NaN as `NaN`, positive infinity as `Infinity`, |
| /// and negative infinity as `-Infinity`. |
| /// |
| /// ## Safety |
| /// |
| /// The `result` pointer argument must point to sufficiently many writable bytes |
| /// to hold Ryū's representation of `f`. |
| /// |
| /// ## Example |
| /// |
| /// ```rust |
| /// let f = 1.234f64; |
| /// |
| /// unsafe { |
| /// let mut buffer: [u8; 24] = std::mem::uninitialized(); |
| /// let n = ryu::raw::d2s_buffered_n(f, &mut buffer[0]); |
| /// let s = std::str::from_utf8_unchecked(&buffer[..n]); |
| /// assert_eq!(s, "1.234E0"); |
| /// } |
| /// ``` |
| #[cfg_attr(must_use_return, must_use)] |
| #[cfg_attr(feature = "no-panic", no_panic)] |
| pub unsafe fn d2s_buffered_n(f: f64, result: *mut u8) -> usize { |
| // Step 1: Decode the floating-point number, and unify normalized and subnormal cases. |
| let bits = mem::transmute::<f64, u64>(f); |
| |
| // Decode bits into sign, mantissa, and exponent. |
| let ieee_sign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0; |
| let ieee_mantissa = bits & ((1u64 << DOUBLE_MANTISSA_BITS) - 1); |
| let ieee_exponent = |
| (bits >> DOUBLE_MANTISSA_BITS) as u32 & ((1u32 << DOUBLE_EXPONENT_BITS) - 1); |
| // Case distinction; exit early for the easy cases. |
| if ieee_exponent == ((1u32 << DOUBLE_EXPONENT_BITS) - 1) |
| || (ieee_exponent == 0 && ieee_mantissa == 0) |
| { |
| return copy_special_str(result, ieee_sign, ieee_exponent != 0, ieee_mantissa != 0); |
| } |
| |
| let v = d2d(ieee_mantissa, ieee_exponent); |
| to_chars(v, ieee_sign, result) |
| } |