blob: c777ed81195c75b3b221f2992bf9950c11fa4ff9 [file] [log] [blame]
/* Copyright (C) 2007 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file. (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
#include "bid_internal.h"
/*****************************************************************************
* BID64_round_integral_exact
****************************************************************************/
#if DECIMAL_CALL_BY_REFERENCE
void
bid64_round_integral_exact (UINT64 * pres,
UINT64 *
px _RND_MODE_PARAM _EXC_FLAGS_PARAM
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
UINT64 x = *px;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT64
bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT64 x_sign;
int exp; // unbiased exponent
// Note: C1 represents the significand (UINT64)
BID_UI64DOUBLE tmp1;
int x_nr_bits;
int q, ind, shift;
UINT64 C1;
// UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits
UINT128 fstar = { {0x0ull, 0x0ull} };
UINT128 P128;
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
// check for NaNs and infinities
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
else
x = x & 0xfe03ffffffffffffull; // clear G6-G12
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (SNaN)
res = x & 0xfdffffffffffffffull;
} else { // QNaN
res = x;
}
BID_RETURN (res);
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
res = x_sign | 0x7800000000000000ull;
BID_RETURN (res);
}
// unpack x
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
// if the steering bits are 11 (condition will be 0), then
// the exponent is G[0:w+1]
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
if (C1 > 9999999999999999ull) { // non-canonical
C1 = 0;
}
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
C1 = (x & MASK_BINARY_SIG1);
}
// if x is 0 or non-canonical return 0 preserving the sign bit and
// the preferred exponent of MAX(Q(x), 0)
if (C1 == 0) {
if (exp < 0)
exp = 0;
res = x_sign | (((UINT64) exp + 398) << 53);
BID_RETURN (res);
}
// x is a finite non-zero number (not 0, non-canonical, or special)
switch (rnd_mode) {
case ROUNDING_TO_NEAREST:
case ROUNDING_TIES_AWAY:
// return 0 if (exp <= -(p+1))
if (exp <= -17) {
res = x_sign | 0x31c0000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_DOWN:
// return 0 if (exp <= -p)
if (exp <= -16) {
if (x_sign) {
res = 0xb1c0000000000001ull;
} else {
res = 0x31c0000000000000ull;
}
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_UP:
// return 0 if (exp <= -p)
if (exp <= -16) {
if (x_sign) {
res = 0xb1c0000000000000ull;
} else {
res = 0x31c0000000000001ull;
}
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_TO_ZERO:
// return 0 if (exp <= -p)
if (exp <= -16) {
res = x_sign | 0x31c0000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
} // end switch ()
// q = nr. of decimal digits in x (1 <= q <= 54)
// determine first the nr. of bits in x
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
q = 16;
} else { // if x < 2^53
tmp1.d = (double) C1; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
q++;
}
}
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res = x;
BID_RETURN (res);
}
switch (rnd_mode) {
case ROUNDING_TO_NEAREST:
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
C1 = C1 + midpoint64[ind - 1];
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// if (0 < f* < 10^(-x)) then the result is a midpoint
// if floor(C*) is even then C* = floor(C*) - logical right
// shift; C* has p decimal digits, correct by Prop. 1)
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
// shift; C* has p decimal digits, correct by Pr. 1)
// else
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
fstar.w[1] = 0;
fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
fstar.w[0] = P128.w[0];
}
// if (0 < f* < 10^(-x)) then the result is a midpoint
// since round_to_even, subtract 1 if current result is odd
if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0)
&& (fstar.w[0] < ten2mk64[ind - 1])) {
res--;
}
// determine inexactness of the rounding of C*
// if (0 < f* - 1/2 < 10^(-x)) then
// the result is exact
// else // if (f* - 1/2 > T*) then
// the result is inexact
if (ind - 1 <= 2) {
if (fstar.w[0] > 0x8000000000000000ull) {
// f* > 1/2 and the result may be exact
// fstar.w[0] - 0x8000000000000000ull is f* - 1/2
if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
} else { // if 3 <= ind - 1 <= 21
if (fstar.w[1] > onehalf128[ind - 1] ||
(fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
if (fstar.w[1] > onehalf128[ind - 1]
|| fstar.w[0] > ten2mk64[ind - 1]) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
}
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp < 0
// the result is +0 or -0
res = x_sign | 0x31c0000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_TIES_AWAY:
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
C1 = C1 + midpoint64[ind - 1];
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// if (0 < f* < 10^(-x)) then the result is a midpoint
// C* = floor(C*) - logical right shift; C* has p decimal digits,
// correct by Prop. 1)
// else
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
fstar.w[1] = 0;
fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
fstar.w[0] = P128.w[0];
}
// midpoints are already rounded correctly
// determine inexactness of the rounding of C*
// if (0 < f* - 1/2 < 10^(-x)) then
// the result is exact
// else // if (f* - 1/2 > T*) then
// the result is inexact
if (ind - 1 <= 2) {
if (fstar.w[0] > 0x8000000000000000ull) {
// f* > 1/2 and the result may be exact
// fstar.w[0] - 0x8000000000000000ull is f* - 1/2
if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
} else { // if 3 <= ind - 1 <= 21
if (fstar.w[1] > onehalf128[ind - 1] ||
(fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
if (fstar.w[1] > onehalf128[ind - 1]
|| fstar.w[0] > ten2mk64[ind - 1]) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
}
}
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp < 0
// the result is +0 or -0
res = x_sign | 0x31c0000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_DOWN:
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 fits in 64 bits
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = C1 * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// if (0 < f* < 10^(-x)) then the result is exact
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
fstar.w[1] = 0;
fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
fstar.w[0] = P128.w[0];
}
// if (f* > 10^(-x)) then the result is inexact
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
if (x_sign) {
// if negative and not exact, increment magnitude
res++;
}
*pfpsf |= INEXACT_EXCEPTION;
}
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0
// the result is +0 or -1
if (x_sign) {
res = 0xb1c0000000000001ull;
} else {
res = 0x31c0000000000000ull;
}
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_UP:
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 fits in 64 bits
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = C1 * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// if (0 < f* < 10^(-x)) then the result is exact
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
fstar.w[1] = 0;
fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
fstar.w[0] = P128.w[0];
}
// if (f* > 10^(-x)) then the result is inexact
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
if (!x_sign) {
// if positive and not exact, increment magnitude
res++;
}
*pfpsf |= INEXACT_EXCEPTION;
}
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0
// the result is -0 or +1
if (x_sign) {
res = 0xb1c0000000000000ull;
} else {
res = 0x31c0000000000001ull;
}
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
case ROUNDING_TO_ZERO:
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 fits in 127 bits
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = C1 * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// if (0 < f* < 10^(-x)) then the result is exact
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
fstar.w[1] = 0;
fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
fstar.w[0] = P128.w[0];
}
// if (f* > 10^(-x)) then the result is inexact
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
*pfpsf |= INEXACT_EXCEPTION;
}
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp < 0
// the result is +0 or -0
res = x_sign | 0x31c0000000000000ull;
*pfpsf |= INEXACT_EXCEPTION;
BID_RETURN (res);
}
break;
} // end switch ()
BID_RETURN (res);
}
/*****************************************************************************
* BID64_round_integral_nearest_even
****************************************************************************/
#if DECIMAL_CALL_BY_REFERENCE
void
bid64_round_integral_nearest_even (UINT64 * pres,
UINT64 *
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#else
UINT64
bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT64 x_sign;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
BID_UI64DOUBLE tmp1;
int x_nr_bits;
int q, ind, shift;
UINT64 C1;
UINT128 fstar;
UINT128 P128;
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
// check for NaNs and infinities
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
else
x = x & 0xfe03ffffffffffffull; // clear G6-G12
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (SNaN)
res = x & 0xfdffffffffffffffull;
} else { // QNaN
res = x;
}
BID_RETURN (res);
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
res = x_sign | 0x7800000000000000ull;
BID_RETURN (res);
}
// unpack x
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
// if the steering bits are 11 (condition will be 0), then
// the exponent is G[0:w+1]
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
if (C1 > 9999999999999999ull) { // non-canonical
C1 = 0;
}
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
C1 = (x & MASK_BINARY_SIG1);
}
// if x is 0 or non-canonical
if (C1 == 0) {
if (exp < 0)
exp = 0;
res = x_sign | (((UINT64) exp + 398) << 53);
BID_RETURN (res);
}
// x is a finite non-zero number (not 0, non-canonical, or special)
// return 0 if (exp <= -(p+1))
if (exp <= -17) {
res = x_sign | 0x31c0000000000000ull;
BID_RETURN (res);
}
// q = nr. of decimal digits in x (1 <= q <= 54)
// determine first the nr. of bits in x
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
q = 16;
} else { // if x < 2^53
tmp1.d = (double) C1; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
q++;
}
}
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res = x;
BID_RETURN (res);
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
C1 = C1 + midpoint64[ind - 1];
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// if (0 < f* < 10^(-x)) then the result is a midpoint
// if floor(C*) is even then C* = floor(C*) - logical right
// shift; C* has p decimal digits, correct by Prop. 1)
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
// shift; C* has p decimal digits, correct by Pr. 1)
// else
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
fstar.w[1] = 0;
fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
fstar.w[0] = P128.w[0];
}
// if (0 < f* < 10^(-x)) then the result is a midpoint
// since round_to_even, subtract 1 if current result is odd
if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0)
&& (fstar.w[0] < ten2mk64[ind - 1])) {
res--;
}
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp < 0
// the result is +0 or -0
res = x_sign | 0x31c0000000000000ull;
BID_RETURN (res);
}
}
/*****************************************************************************
* BID64_round_integral_negative
*****************************************************************************/
#if DECIMAL_CALL_BY_REFERENCE
void
bid64_round_integral_negative (UINT64 * pres,
UINT64 *
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#else
UINT64
bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT64 x_sign;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
BID_UI64DOUBLE tmp1;
int x_nr_bits;
int q, ind, shift;
UINT64 C1;
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
UINT128 fstar;
UINT128 P128;
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
// check for NaNs and infinities
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
else
x = x & 0xfe03ffffffffffffull; // clear G6-G12
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (SNaN)
res = x & 0xfdffffffffffffffull;
} else { // QNaN
res = x;
}
BID_RETURN (res);
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
res = x_sign | 0x7800000000000000ull;
BID_RETURN (res);
}
// unpack x
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
// if the steering bits are 11 (condition will be 0), then
// the exponent is G[0:w+1]
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
if (C1 > 9999999999999999ull) { // non-canonical
C1 = 0;
}
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
C1 = (x & MASK_BINARY_SIG1);
}
// if x is 0 or non-canonical
if (C1 == 0) {
if (exp < 0)
exp = 0;
res = x_sign | (((UINT64) exp + 398) << 53);
BID_RETURN (res);
}
// x is a finite non-zero number (not 0, non-canonical, or special)
// return 0 if (exp <= -p)
if (exp <= -16) {
if (x_sign) {
res = 0xb1c0000000000001ull;
} else {
res = 0x31c0000000000000ull;
}
BID_RETURN (res);
}
// q = nr. of decimal digits in x (1 <= q <= 54)
// determine first the nr. of bits in x
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
q = 16;
} else { // if x < 2^53
tmp1.d = (double) C1; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
q++;
}
}
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res = x;
BID_RETURN (res);
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 fits in 64 bits
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = C1 * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// if (0 < f* < 10^(-x)) then the result is exact
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
fstar.w[1] = 0;
fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
fstar.w[0] = P128.w[0];
}
// if (f* > 10^(-x)) then the result is inexact
if (x_sign
&& ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) {
// if negative and not exact, increment magnitude
res++;
}
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0
// the result is +0 or -1
if (x_sign) {
res = 0xb1c0000000000001ull;
} else {
res = 0x31c0000000000000ull;
}
BID_RETURN (res);
}
}
/*****************************************************************************
* BID64_round_integral_positive
****************************************************************************/
#if DECIMAL_CALL_BY_REFERENCE
void
bid64_round_integral_positive (UINT64 * pres,
UINT64 *
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#else
UINT64
bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT64 x_sign;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
BID_UI64DOUBLE tmp1;
int x_nr_bits;
int q, ind, shift;
UINT64 C1;
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
UINT128 fstar;
UINT128 P128;
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
// check for NaNs and infinities
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
else
x = x & 0xfe03ffffffffffffull; // clear G6-G12
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (SNaN)
res = x & 0xfdffffffffffffffull;
} else { // QNaN
res = x;
}
BID_RETURN (res);
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
res = x_sign | 0x7800000000000000ull;
BID_RETURN (res);
}
// unpack x
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
// if the steering bits are 11 (condition will be 0), then
// the exponent is G[0:w+1]
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
if (C1 > 9999999999999999ull) { // non-canonical
C1 = 0;
}
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
C1 = (x & MASK_BINARY_SIG1);
}
// if x is 0 or non-canonical
if (C1 == 0) {
if (exp < 0)
exp = 0;
res = x_sign | (((UINT64) exp + 398) << 53);
BID_RETURN (res);
}
// x is a finite non-zero number (not 0, non-canonical, or special)
// return 0 if (exp <= -p)
if (exp <= -16) {
if (x_sign) {
res = 0xb1c0000000000000ull;
} else {
res = 0x31c0000000000001ull;
}
BID_RETURN (res);
}
// q = nr. of decimal digits in x (1 <= q <= 54)
// determine first the nr. of bits in x
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
q = 16;
} else { // if x < 2^53
tmp1.d = (double) C1; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
q++;
}
}
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res = x;
BID_RETURN (res);
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 fits in 64 bits
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = C1 * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// if (0 < f* < 10^(-x)) then the result is exact
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
fstar.w[1] = 0;
fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
fstar.w[0] = P128.w[0];
}
// if (f* > 10^(-x)) then the result is inexact
if (!x_sign
&& ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) {
// if positive and not exact, increment magnitude
res++;
}
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp <= 0
// the result is -0 or +1
if (x_sign) {
res = 0xb1c0000000000000ull;
} else {
res = 0x31c0000000000001ull;
}
BID_RETURN (res);
}
}
/*****************************************************************************
* BID64_round_integral_zero
****************************************************************************/
#if DECIMAL_CALL_BY_REFERENCE
void
bid64_round_integral_zero (UINT64 * pres,
UINT64 *
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#else
UINT64
bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT64 x_sign;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
BID_UI64DOUBLE tmp1;
int x_nr_bits;
int q, ind, shift;
UINT64 C1;
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
UINT128 P128;
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
// check for NaNs and infinities
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
else
x = x & 0xfe03ffffffffffffull; // clear G6-G12
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (SNaN)
res = x & 0xfdffffffffffffffull;
} else { // QNaN
res = x;
}
BID_RETURN (res);
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
res = x_sign | 0x7800000000000000ull;
BID_RETURN (res);
}
// unpack x
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
// if the steering bits are 11 (condition will be 0), then
// the exponent is G[0:w+1]
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
if (C1 > 9999999999999999ull) { // non-canonical
C1 = 0;
}
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
C1 = (x & MASK_BINARY_SIG1);
}
// if x is 0 or non-canonical
if (C1 == 0) {
if (exp < 0)
exp = 0;
res = x_sign | (((UINT64) exp + 398) << 53);
BID_RETURN (res);
}
// x is a finite non-zero number (not 0, non-canonical, or special)
// return 0 if (exp <= -p)
if (exp <= -16) {
res = x_sign | 0x31c0000000000000ull;
BID_RETURN (res);
}
// q = nr. of decimal digits in x (1 <= q <= 54)
// determine first the nr. of bits in x
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
q = 16;
} else { // if x < 2^53
tmp1.d = (double) C1; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
q++;
}
}
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res = x;
BID_RETURN (res);
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 fits in 127 bits
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = C1 * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// if (0 < f* < 10^(-x)) then the result is exact
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
// redundant fstar.w[1] = 0;
// redundant fstar.w[0] = P128.w[0];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
// redundant fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
// redundant fstar.w[0] = P128.w[0];
}
// if (f* > 10^(-x)) then the result is inexact
// if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind-1])){
// // redundant
// }
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp < 0
// the result is +0 or -0
res = x_sign | 0x31c0000000000000ull;
BID_RETURN (res);
}
}
/*****************************************************************************
* BID64_round_integral_nearest_away
****************************************************************************/
#if DECIMAL_CALL_BY_REFERENCE
void
bid64_round_integral_nearest_away (UINT64 * pres,
UINT64 *
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#else
UINT64
bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT64 x_sign;
int exp; // unbiased exponent
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
BID_UI64DOUBLE tmp1;
int x_nr_bits;
int q, ind, shift;
UINT64 C1;
UINT128 P128;
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
// check for NaNs and infinities
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
else
x = x & 0xfe03ffffffffffffull; // clear G6-G12
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (SNaN)
res = x & 0xfdffffffffffffffull;
} else { // QNaN
res = x;
}
BID_RETURN (res);
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
res = x_sign | 0x7800000000000000ull;
BID_RETURN (res);
}
// unpack x
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
// if the steering bits are 11 (condition will be 0), then
// the exponent is G[0:w+1]
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
if (C1 > 9999999999999999ull) { // non-canonical
C1 = 0;
}
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
C1 = (x & MASK_BINARY_SIG1);
}
// if x is 0 or non-canonical
if (C1 == 0) {
if (exp < 0)
exp = 0;
res = x_sign | (((UINT64) exp + 398) << 53);
BID_RETURN (res);
}
// x is a finite non-zero number (not 0, non-canonical, or special)
// return 0 if (exp <= -(p+1))
if (exp <= -17) {
res = x_sign | 0x31c0000000000000ull;
BID_RETURN (res);
}
// q = nr. of decimal digits in x (1 <= q <= 54)
// determine first the nr. of bits in x
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
q = 16;
} else { // if x < 2^53
tmp1.d = (double) C1; // exact conversion
x_nr_bits =
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
q = nr_digits[x_nr_bits - 1].digits;
if (q == 0) {
q = nr_digits[x_nr_bits - 1].digits1;
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
q++;
}
}
if (exp >= 0) { // -exp <= 0
// the argument is an integer already
res = x;
BID_RETURN (res);
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
// need to shift right -exp digits from the coefficient; the exp will be 0
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
// chop off ind digits from the lower part of C1
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
C1 = C1 + midpoint64[ind - 1];
// calculate C* and f*
// C* is actually floor(C*) in this case
// C* and f* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// 1 <= x <= 16
// kx = 10^(-x) = ten2mk64[ind - 1]
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
// the approximation of 10^(-x) was rounded up to 64 bits
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
// if (0 < f* < 10^(-x)) then the result is a midpoint
// C* = floor(C*) - logical right shift; C* has p decimal digits,
// correct by Prop. 1)
// else
// C* = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C* * 10^(e+x)
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
res = P128.w[1];
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
res = (P128.w[1] >> shift);
}
// midpoints are already rounded correctly
// set exponent to zero as it was negative before.
res = x_sign | 0x31c0000000000000ull | res;
BID_RETURN (res);
} else { // if exp < 0 and q + exp < 0
// the result is +0 or -0
res = x_sign | 0x31c0000000000000ull;
BID_RETURN (res);
}
}