| // Adapted from https://github.com/Alexhuszagh/rust-lexical. |
| |
| // FLOAT TYPE |
| |
| use super::num::*; |
| use super::rounding::*; |
| use super::shift::*; |
| |
| /// Extended precision floating-point type. |
| /// |
| /// Private implementation, exposed only for testing purposes. |
| #[doc(hidden)] |
| #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
| pub(crate) struct ExtendedFloat { |
| /// Mantissa for the extended-precision float. |
| pub mant: u64, |
| /// Binary exponent for the extended-precision float. |
| pub exp: i32, |
| } |
| |
| impl ExtendedFloat { |
| // PROPERTIES |
| |
| // OPERATIONS |
| |
| /// Multiply two normalized extended-precision floats, as if by `a*b`. |
| /// |
| /// The precision is maximal when the numbers are normalized, however, |
| /// decent precision will occur as long as both values have high bits |
| /// set. The result is not normalized. |
| /// |
| /// Algorithm: |
| /// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input). |
| /// 2. Normalization of the result (not done here). |
| /// 3. Addition of exponents. |
| pub(crate) fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat { |
| // Logic check, values must be decently normalized prior to multiplication. |
| debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0)); |
| |
| // Extract high-and-low masks. |
| let ah = self.mant >> u64::HALF; |
| let al = self.mant & u64::LOMASK; |
| let bh = b.mant >> u64::HALF; |
| let bl = b.mant & u64::LOMASK; |
| |
| // Get our products |
| let ah_bl = ah * bl; |
| let al_bh = al * bh; |
| let al_bl = al * bl; |
| let ah_bh = ah * bh; |
| |
| let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF); |
| // round up |
| tmp += 1 << (u64::HALF - 1); |
| |
| ExtendedFloat { |
| mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF), |
| exp: self.exp + b.exp + u64::FULL, |
| } |
| } |
| |
| /// Multiply in-place, as if by `a*b`. |
| /// |
| /// The result is not normalized. |
| #[inline] |
| pub(crate) fn imul(&mut self, b: &ExtendedFloat) { |
| *self = self.mul(b); |
| } |
| |
| // NORMALIZE |
| |
| /// Normalize float-point number. |
| /// |
| /// Shift the mantissa so the number of leading zeros is 0, or the value |
| /// itself is 0. |
| /// |
| /// Get the number of bytes shifted. |
| #[inline] |
| pub(crate) fn normalize(&mut self) -> u32 { |
| // Note: |
| // Using the cltz intrinsic via leading_zeros is way faster (~10x) |
| // than shifting 1-bit at a time, via while loop, and also way |
| // faster (~2x) than an unrolled loop that checks at 32, 16, 4, |
| // 2, and 1 bit. |
| // |
| // Using a modulus of pow2 (which will get optimized to a bitwise |
| // and with 0x3F or faster) is slightly slower than an if/then, |
| // however, removing the if/then will likely optimize more branched |
| // code as it removes conditional logic. |
| |
| // Calculate the number of leading zeros, and then zero-out |
| // any overflowing bits, to avoid shl overflow when self.mant == 0. |
| let shift = if self.mant == 0 { |
| 0 |
| } else { |
| self.mant.leading_zeros() |
| }; |
| shl(self, shift as i32); |
| shift |
| } |
| |
| // ROUND |
| |
| /// Lossy round float-point number to native mantissa boundaries. |
| #[inline] |
| pub(crate) fn round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm) |
| where |
| F: Float, |
| Algorithm: FnOnce(&mut ExtendedFloat, i32), |
| { |
| round_to_native::<F, _>(self, algorithm); |
| } |
| |
| // FROM |
| |
| /// Create extended float from native float. |
| #[inline] |
| pub fn from_float<F: Float>(f: F) -> ExtendedFloat { |
| from_float(f) |
| } |
| |
| // INTO |
| |
| /// Convert into default-rounded, lower-precision native float. |
| #[inline] |
| pub(crate) fn into_float<F: Float>(mut self) -> F { |
| self.round_to_native::<F, _>(round_nearest_tie_even); |
| into_float(self) |
| } |
| |
| /// Convert into downward-rounded, lower-precision native float. |
| #[inline] |
| pub(crate) fn into_downward_float<F: Float>(mut self) -> F { |
| self.round_to_native::<F, _>(round_downward); |
| into_float(self) |
| } |
| } |
| |
| // FROM FLOAT |
| |
| // Import ExtendedFloat from native float. |
| #[inline] |
| pub(crate) fn from_float<F>(f: F) -> ExtendedFloat |
| where |
| F: Float, |
| { |
| ExtendedFloat { |
| mant: u64::as_cast(f.mantissa()), |
| exp: f.exponent(), |
| } |
| } |
| |
| // INTO FLOAT |
| |
| // Export extended-precision float to native float. |
| // |
| // The extended-precision float must be in native float representation, |
| // with overflow/underflow appropriately handled. |
| #[inline] |
| pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F |
| where |
| F: Float, |
| { |
| // Export floating-point number. |
| if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT { |
| // sub-denormal, underflow |
| F::ZERO |
| } else if fp.exp >= F::MAX_EXPONENT { |
| // overflow |
| F::from_bits(F::INFINITY_BITS) |
| } else { |
| // calculate the exp and fraction bits, and return a float from bits. |
| let exp: u64; |
| if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 { |
| exp = 0; |
| } else { |
| exp = (fp.exp + F::EXPONENT_BIAS) as u64; |
| } |
| let exp = exp << F::MANTISSA_SIZE; |
| let mant = fp.mant & F::MANTISSA_MASK.as_u64(); |
| F::from_bits(F::Unsigned::as_cast(mant | exp)) |
| } |
| } |