gcc-4.3.1/libgcc/config/libbid/bid_inline_add.h - toolchain/gcc - Git at Google

 /* Copyright (C) 2007  Free Software Foundation, Inc.

 This file is part of GCC.

 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 Software Foundation; either version 2, or (at your option) any later
 version.

 In addition to the permissions in the GNU General Public License, the
 Free Software Foundation gives you unlimited permission to link the
 compiled version of this file into combinations with other programs,
 and to distribute those combinations without any restriction coming
 from the use of this file.  (The General Public License restrictions
 do apply in other respects; for example, they cover modification of
 the file, and distribution when not linked into a combine
 executable.)

 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING.  If not, write to the Free
 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
 02110-1301, USA.  */

 /*****************************************************************************
  *
  *    Helper add functions (for fma)
  *
  *    __BID_INLINE__ UINT64 get_add64(
  *        UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  *        UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
  *  					 int rounding_mode)
  *
  *   __BID_INLINE__ UINT64 get_add128(
  *                       UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  *                       UINT64 sign_y, int final_exponent_y, UINT128 CY,
  *                       int extra_digits, int rounding_mode)
  *
  *****************************************************************************
  *
  *  Algorithm description:
  *
  *  get_add64:  same as BID64 add, but arguments are unpacked and there
  *                                 are no special case checks
  *
  *  get_add128: add 64-bit coefficient to 128-bit product (which contains
  *                                        16+extra_digits decimal digits),
  *                         return BID64 result
  *              - the exponents are compared and the two coefficients are
  *                properly aligned for addition/subtraction
  *              - multiple paths are needed
  *              - final result exponent is calculated and the lower term is
  *                      rounded first if necessary, to avoid manipulating
  *                      coefficients longer than 128 bits
  *
  ****************************************************************************/

 #ifndef _INLINE_BID_ADD_H_
 #define _INLINE_BID_ADD_H_

 #include "bid_internal.h"

 #define MAX_FORMAT_DIGITS     16
 #define DECIMAL_EXPONENT_BIAS 398
 #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
 #define BINARY_EXPONENT_BIAS  0x3ff
 #define UPPER_EXPON_LIMIT     51

 ///////////////////////////////////////////////////////////////////////
 //
 // get_add64() is essentially the same as bid_add(), except that
 //             the arguments are unpacked
 //
 //////////////////////////////////////////////////////////////////////
 __BID_INLINE__ UINT64
 get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
 	   UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
 	   int rounding_mode, unsigned *fpsc) {
   UINT128 CA, CT, CT_new;
   UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
     rem_a;
   UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp,
     C64_new;
   int_double tempx;
   int exponent_a, exponent_b, diff_dec_expon;
   int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
   unsigned rmode, status;

   // sort arguments by exponent
   if (exponent_x <= exponent_y) {
     sign_a = sign_y;
     exponent_a = exponent_y;
     coefficient_a = coefficient_y;
     sign_b = sign_x;
     exponent_b = exponent_x;
     coefficient_b = coefficient_x;
   } else {
     sign_a = sign_x;
     exponent_a = exponent_x;
     coefficient_a = coefficient_x;
     sign_b = sign_y;
     exponent_b = exponent_y;
     coefficient_b = coefficient_y;
   }

   // exponent difference
   diff_dec_expon = exponent_a - exponent_b;

   /* get binary coefficients of x and y */

   //--- get number of bits in the coefficients of x and y ---

   tempx.d = (double) coefficient_a;
   bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;

   if (!coefficient_a) {
     return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode,
 		      fpsc);
   }
   if (diff_dec_expon > MAX_FORMAT_DIGITS) {
     // normalize a to a 16-digit coefficient

     scale_ca = estimate_decimal_digits[bin_expon_ca];
     if (coefficient_a >= power10_table_128[scale_ca].w[0])
       scale_ca++;

     scale_k = 16 - scale_ca;

     coefficient_a *= power10_table_128[scale_k].w[0];

     diff_dec_expon -= scale_k;
     exponent_a -= scale_k;

     /* get binary coefficients of x and y */

     //--- get number of bits in the coefficients of x and y ---
     tempx.d = (double) coefficient_a;
     bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;

     if (diff_dec_expon > MAX_FORMAT_DIGITS) {
 #ifdef SET_STATUS_FLAGS
       if (coefficient_b) {
 	__set_status_flags (fpsc, INEXACT_EXCEPTION);
       }
 #endif

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
       if (((rounding_mode) & 3) && coefficient_b)	// not ROUNDING_TO_NEAREST
       {
 	switch (rounding_mode) {
 	case ROUNDING_DOWN:
 	  if (sign_b) {
 	    coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
 	    if (coefficient_a < 1000000000000000ull) {
 	      exponent_a--;
 	      coefficient_a = 9999999999999999ull;
 	    } else if (coefficient_a >= 10000000000000000ull) {
 	      exponent_a++;
 	      coefficient_a = 1000000000000000ull;
 	    }
 	  }
 	  break;
 	case ROUNDING_UP:
 	  if (!sign_b) {
 	    coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
 	    if (coefficient_a < 1000000000000000ull) {
 	      exponent_a--;
 	      coefficient_a = 9999999999999999ull;
 	    } else if (coefficient_a >= 10000000000000000ull) {
 	      exponent_a++;
 	      coefficient_a = 1000000000000000ull;
 	    }
 	  }
 	  break;
 	default:	// RZ
 	  if (sign_a != sign_b) {
 	    coefficient_a--;
 	    if (coefficient_a < 1000000000000000ull) {
 	      exponent_a--;
 	      coefficient_a = 9999999999999999ull;
 	    }
 	  }
 	  break;
 	}
       } else
 #endif
 #endif
 	// check special case here
 	if ((coefficient_a == 1000000000000000ull)
 	    && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
 	    && (sign_a ^ sign_b)
 	    && (coefficient_b > 5000000000000000ull)) {
 	coefficient_a = 9999999999999999ull;
 	exponent_a--;
       }

       return get_BID64 (sign_a, exponent_a, coefficient_a,
 			rounding_mode, fpsc);
     }
   }
   // test whether coefficient_a*10^(exponent_a-exponent_b)  may exceed 2^62
   if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) {
     // coefficient_a*10^(exponent_a-exponent_b)<2^63

     // multiply by 10^(exponent_a-exponent_b)
     coefficient_a *= power10_table_128[diff_dec_expon].w[0];

     // sign mask
     sign_b = ((SINT64) sign_b) >> 63;
     // apply sign to coeff. of b
     coefficient_b = (coefficient_b + sign_b) ^ sign_b;

     // apply sign to coefficient a
     sign_a = ((SINT64) sign_a) >> 63;
     coefficient_a = (coefficient_a + sign_a) ^ sign_a;

     coefficient_a += coefficient_b;
     // get sign
     sign_s = ((SINT64) coefficient_a) >> 63;
     coefficient_a = (coefficient_a + sign_s) ^ sign_s;
     sign_s &= 0x8000000000000000ull;

     // coefficient_a < 10^16 ?
     if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
       if (rounding_mode == ROUNDING_DOWN && (!coefficient_a)
 	  && sign_a != sign_b)
 	sign_s = 0x8000000000000000ull;
 #endif
 #endif
       return get_BID64 (sign_s, exponent_b, coefficient_a,
 			rounding_mode, fpsc);
     }
     // otherwise rounding is necessary

     // already know coefficient_a<10^19
     // coefficient_a < 10^17 ?
     if (coefficient_a < power10_table_128[17].w[0])
       extra_digits = 1;
     else if (coefficient_a < power10_table_128[18].w[0])
       extra_digits = 2;
     else
       extra_digits = 3;

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
     rmode = rounding_mode;
     if (sign_s && (unsigned) (rmode - 1) < 2)
       rmode = 3 - rmode;
 #else
     rmode = 0;
 #endif
 #else
     rmode = 0;
 #endif
     coefficient_a += round_const_table[rmode][extra_digits];

     // get P*(2^M[extra_digits])/10^extra_digits
     __mul_64x64_to_128 (CT, coefficient_a,
 			reciprocals10_64[extra_digits]);

     // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
     amount = short_recip_scale[extra_digits];
     C64 = CT.w[1] >> amount;

   } else {
     // coefficient_a*10^(exponent_a-exponent_b) is large
     sign_s = sign_a;

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
     rmode = rounding_mode;
     if (sign_s && (unsigned) (rmode - 1) < 2)
       rmode = 3 - rmode;
 #else
     rmode = 0;
 #endif
 #else
     rmode = 0;
 #endif

     // check whether we can take faster path
     scale_ca = estimate_decimal_digits[bin_expon_ca];

     sign_ab = sign_a ^ sign_b;
     sign_ab = ((SINT64) sign_ab) >> 63;

     // T1 = 10^(16-diff_dec_expon)
     T1 = power10_table_128[16 - diff_dec_expon].w[0];

     // get number of digits in coefficient_a
     //P_ca = power10_table_128[scale_ca].w[0];
     //P_ca_m1 = power10_table_128[scale_ca-1].w[0];
     if (coefficient_a >= power10_table_128[scale_ca].w[0]) {
       scale_ca++;
       //P_ca_m1 = P_ca;
       //P_ca = power10_table_128[scale_ca].w[0];
     }

     scale_k = 16 - scale_ca;

     // apply sign
     //Ts = (T1 + sign_ab) ^ sign_ab;

     // test range of ca
     //X = coefficient_a + Ts - P_ca_m1;

     // addition
     saved_ca = coefficient_a - T1;
     coefficient_a =
       (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0];
     extra_digits = diff_dec_expon - scale_k;

     // apply sign
     saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
     // add 10^16 and rounding constant
     coefficient_b =
       saved_cb + 10000000000000000ull +
       round_const_table[rmode][extra_digits];

     // get P*(2^M[extra_digits])/10^extra_digits
     __mul_64x64_to_128 (CT, coefficient_b,
 			reciprocals10_64[extra_digits]);

     // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
     amount = short_recip_scale[extra_digits];
     C0_64 = CT.w[1] >> amount;

     // result coefficient
     C64 = C0_64 + coefficient_a;
     // filter out difficult (corner) cases
     // the following test is equivalent to
     // ( (initial_coefficient_a + Ts) < P_ca &&
     //     (initial_coefficient_a + Ts) > P_ca_m1 ),
     // which ensures the number of digits in coefficient_a does not change
     // after adding (the appropriately scaled and rounded) coefficient_b
     if ((UINT64) (C64 - 1000000000000000ull - 1) >
 	9000000000000000ull - 2) {
       if (C64 >= 10000000000000000ull) {
 	// result has more than 16 digits
 	if (!scale_k) {
 	  // must divide coeff_a by 10
 	  saved_ca = saved_ca + T1;
 	  __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
 	  //reciprocals10_64[1]);
 	  coefficient_a = CA.w[1] >> 1;
 	  rem_a =
 	    saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
 	  coefficient_a = coefficient_a - T1;

 	  saved_cb +=
 	    /*90000000000000000 */ +rem_a *
 	    power10_table_128[diff_dec_expon].w[0];
 	} else
 	  coefficient_a =
 	    (SINT64) (saved_ca - T1 -
 		      (T1 << 3)) * (SINT64) power10_table_128[scale_k -
 							      1].w[0];

 	extra_digits++;
 	coefficient_b =
 	  saved_cb + 100000000000000000ull +
 	  round_const_table[rmode][extra_digits];

 	// get P*(2^M[extra_digits])/10^extra_digits
 	__mul_64x64_to_128 (CT, coefficient_b,
 			    reciprocals10_64[extra_digits]);

 	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
 	amount = short_recip_scale[extra_digits];
 	C0_64 = CT.w[1] >> amount;

 	// result coefficient
 	C64 = C0_64 + coefficient_a;
       } else if (C64 <= 1000000000000000ull) {
 	// less than 16 digits in result
 	coefficient_a =
 	  (SINT64) saved_ca *(SINT64) power10_table_128[scale_k +
 							1].w[0];
 	//extra_digits --;
 	exponent_b--;
 	coefficient_b =
 	  (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
 	  round_const_table[rmode][extra_digits];

 	// get P*(2^M[extra_digits])/10^extra_digits
 	__mul_64x64_to_128 (CT_new, coefficient_b,
 			    reciprocals10_64[extra_digits]);

 	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
 	amount = short_recip_scale[extra_digits];
 	C0_64 = CT_new.w[1] >> amount;

 	// result coefficient
 	C64_new = C0_64 + coefficient_a;
 	if (C64_new < 10000000000000000ull) {
 	  C64 = C64_new;
 #ifdef SET_STATUS_FLAGS
 	  CT = CT_new;
 #endif
 	} else
 	  exponent_b++;
       }

     }

   }

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
   if (rmode == 0)	//ROUNDING_TO_NEAREST
 #endif
     if (C64 & 1) {
       // check whether fractional part of initial_P/10^extra_digits
       // is exactly .5
       // this is the same as fractional part of
       //      (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero

       // get remainder
       remainder_h = CT.w[1] << (64 - amount);

       // test whether fractional part is 0
       if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) {
 	C64--;
       }
     }
 #endif

 #ifdef SET_STATUS_FLAGS
   status = INEXACT_EXCEPTION;

   // get remainder
   remainder_h = CT.w[1] << (64 - amount);

   switch (rmode) {
   case ROUNDING_TO_NEAREST:
   case ROUNDING_TIES_AWAY:
     // test whether fractional part is 0
     if ((remainder_h == 0x8000000000000000ull)
 	&& (CT.w[0] < reciprocals10_64[extra_digits]))
       status = EXACT_STATUS;
     break;
   case ROUNDING_DOWN:
   case ROUNDING_TO_ZERO:
     if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits]))
       status = EXACT_STATUS;
     break;
   default:
     // round up
     __add_carry_out (tmp, carry, CT.w[0],
 		     reciprocals10_64[extra_digits]);
     if ((remainder_h >> (64 - amount)) + carry >=
 	(((UINT64) 1) << amount))
       status = EXACT_STATUS;
     break;
   }
   __set_status_flags (fpsc, status);

 #endif

   return get_BID64 (sign_s, exponent_b + extra_digits, C64,
 		    rounding_mode, fpsc);
 }


 ///////////////////////////////////////////////////////////////////
 // round 128-bit coefficient and return result in BID64 format
 // do not worry about midpoint cases
 //////////////////////////////////////////////////////////////////
 static UINT64
 __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P,
 			     int extra_digits, int rounding_mode,
 			     unsigned *fpsc) {
   UINT128 Q_high, Q_low, C128;
   UINT64 C64;
   int amount, rmode;

   rmode = rounding_mode;
 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
   if (sign && (unsigned) (rmode - 1) < 2)
     rmode = 3 - rmode;
 #endif
 #endif
   __add_128_64 (P, P, round_const_table[rmode][extra_digits]);

   // get P*(2^M[extra_digits])/10^extra_digits
   __mul_128x128_full (Q_high, Q_low, P,
 		      reciprocals10_128[extra_digits]);

   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
   amount = recip_scale[extra_digits];
   __shr_128 (C128, Q_high, amount);

   C64 = __low_64 (C128);

 #ifdef SET_STATUS_FLAGS

   __set_status_flags (fpsc, INEXACT_EXCEPTION);

 #endif

   return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
 }

 ///////////////////////////////////////////////////////////////////
 // round 128-bit coefficient and return result in BID64 format
 ///////////////////////////////////////////////////////////////////
 static UINT64
 __bid_full_round64 (UINT64 sign, int exponent, UINT128 P,
 		    int extra_digits, int rounding_mode,
 		    unsigned *fpsc) {
   UINT128 Q_high, Q_low, C128, Stemp, PU;
   UINT64 remainder_h, C64, carry, CY;
   int amount, amount2, rmode, status = 0;

   if (exponent < 0) {
     if (exponent >= -16 && (extra_digits + exponent < 0)) {
       extra_digits = -exponent;
 #ifdef SET_STATUS_FLAGS
       if (extra_digits > 0) {
 	rmode = rounding_mode;
 	if (sign && (unsigned) (rmode - 1) < 2)
 	  rmode = 3 - rmode;
 	__add_128_128 (PU, P,
 		       round_const_table_128[rmode][extra_digits]);
 	if (__unsigned_compare_gt_128
 	    (power10_table_128[extra_digits + 15], PU))
 	  status = UNDERFLOW_EXCEPTION;
       }
 #endif
     }
   }

   if (extra_digits > 0) {
     exponent += extra_digits;
     rmode = rounding_mode;
     if (sign && (unsigned) (rmode - 1) < 2)
       rmode = 3 - rmode;
     __add_128_128 (P, P, round_const_table_128[rmode][extra_digits]);

     // get P*(2^M[extra_digits])/10^extra_digits
     __mul_128x128_full (Q_high, Q_low, P,
 			reciprocals10_128[extra_digits]);

     // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
     amount = recip_scale[extra_digits];
     __shr_128_long (C128, Q_high, amount);

     C64 = __low_64 (C128);

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
     if (rmode == 0)	//ROUNDING_TO_NEAREST
 #endif
       if (C64 & 1) {
 	// check whether fractional part of initial_P/10^extra_digits
 	// is exactly .5

 	// get remainder
 	amount2 = 64 - amount;
 	remainder_h = 0;
 	remainder_h--;
 	remainder_h >>= amount2;
 	remainder_h = remainder_h & Q_high.w[0];

 	if (!remainder_h
 	    && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
 		|| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
 		    && Q_low.w[0] <
 		    reciprocals10_128[extra_digits].w[0]))) {
 	  C64--;
 	}
       }
 #endif

 #ifdef SET_STATUS_FLAGS
     status |= INEXACT_EXCEPTION;

     // get remainder
     remainder_h = Q_high.w[0] << (64 - amount);

     switch (rmode) {
     case ROUNDING_TO_NEAREST:
     case ROUNDING_TIES_AWAY:
       // test whether fractional part is 0
       if (remainder_h == 0x8000000000000000ull
 	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
 	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
 		  && Q_low.w[0] <
 		  reciprocals10_128[extra_digits].w[0])))
 	status = EXACT_STATUS;
       break;
     case ROUNDING_DOWN:
     case ROUNDING_TO_ZERO:
       if (!remainder_h
 	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
 	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
 		  && Q_low.w[0] <
 		  reciprocals10_128[extra_digits].w[0])))
 	status = EXACT_STATUS;
       break;
     default:
       // round up
       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
 		       reciprocals10_128[extra_digits].w[0]);
       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
 			  reciprocals10_128[extra_digits].w[1], CY);
       if ((remainder_h >> (64 - amount)) + carry >=
 	  (((UINT64) 1) << amount))
 	status = EXACT_STATUS;
     }

     __set_status_flags (fpsc, status);

 #endif
   } else {
     C64 = P.w[0];
     if (!C64) {
       sign = 0;
       if (rounding_mode == ROUNDING_DOWN)
 	sign = 0x8000000000000000ull;
     }
   }
   return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
 }

 /////////////////////////////////////////////////////////////////////////////////
 // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
 // the lowest 64 bits (remainder_P) are used for midpoint checking only
 ////////////////////////////////////////////////////////////////////////////////
 static UINT64
 __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P,
 			      int extra_digits, UINT64 remainder_P,
 			      int rounding_mode, unsigned *fpsc,
 			      unsigned uf_status) {
   UINT128 Q_high, Q_low, C128, Stemp;
   UINT64 remainder_h, C64, carry, CY;
   int amount, amount2, rmode, status = uf_status;

   rmode = rounding_mode;
   if (sign && (unsigned) (rmode - 1) < 2)
     rmode = 3 - rmode;
   if (rmode == ROUNDING_UP && remainder_P) {
     P.w[0]++;
     if (!P.w[0])
       P.w[1]++;
   }

   if (extra_digits) {
     __add_128_64 (P, P, round_const_table[rmode][extra_digits]);

     // get P*(2^M[extra_digits])/10^extra_digits
     __mul_128x128_full (Q_high, Q_low, P,
 			reciprocals10_128[extra_digits]);

     // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
     amount = recip_scale[extra_digits];
     __shr_128 (C128, Q_high, amount);

     C64 = __low_64 (C128);

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
     if (rmode == 0)	//ROUNDING_TO_NEAREST
 #endif
       if (!remainder_P && (C64 & 1)) {
 	// check whether fractional part of initial_P/10^extra_digits
 	// is exactly .5

 	// get remainder
 	amount2 = 64 - amount;
 	remainder_h = 0;
 	remainder_h--;
 	remainder_h >>= amount2;
 	remainder_h = remainder_h & Q_high.w[0];

 	if (!remainder_h
 	    && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
 		|| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
 		    && Q_low.w[0] <
 		    reciprocals10_128[extra_digits].w[0]))) {
 	  C64--;
 	}
       }
 #endif

 #ifdef SET_STATUS_FLAGS
     status |= INEXACT_EXCEPTION;

     if (!remainder_P) {
       // get remainder
       remainder_h = Q_high.w[0] << (64 - amount);

       switch (rmode) {
       case ROUNDING_TO_NEAREST:
       case ROUNDING_TIES_AWAY:
 	// test whether fractional part is 0
 	if (remainder_h == 0x8000000000000000ull
 	    && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
 		|| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
 		    && Q_low.w[0] <
 		    reciprocals10_128[extra_digits].w[0])))
 	  status = EXACT_STATUS;
 	break;
       case ROUNDING_DOWN:
       case ROUNDING_TO_ZERO:
 	if (!remainder_h
 	    && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
 		|| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
 		    && Q_low.w[0] <
 		    reciprocals10_128[extra_digits].w[0])))
 	  status = EXACT_STATUS;
 	break;
       default:
 	// round up
 	__add_carry_out (Stemp.w[0], CY, Q_low.w[0],
 			 reciprocals10_128[extra_digits].w[0]);
 	__add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
 			    reciprocals10_128[extra_digits].w[1], CY);
 	if ((remainder_h >> (64 - amount)) + carry >=
 	    (((UINT64) 1) << amount))
 	  status = EXACT_STATUS;
       }
     }
     __set_status_flags (fpsc, status);

 #endif
   } else {
     C64 = P.w[0];
 #ifdef SET_STATUS_FLAGS
     if (remainder_P) {
       __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION);
     }
 #endif
   }

   return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode,
 		    fpsc);
 }


 ///////////////////////////////////////////////////////////////////
 // get P/10^extra_digits
 // result fits in 64 bits
 ///////////////////////////////////////////////////////////////////
 __BID_INLINE__ UINT64
 __truncate (UINT128 P, int extra_digits)
 // extra_digits <= 16
 {
   UINT128 Q_high, Q_low, C128;
   UINT64 C64;
   int amount;

   // get P*(2^M[extra_digits])/10^extra_digits
   __mul_128x128_full (Q_high, Q_low, P,
 		      reciprocals10_128[extra_digits]);

   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
   amount = recip_scale[extra_digits];
   __shr_128 (C128, Q_high, amount);

   C64 = __low_64 (C128);

   return C64;
 }


 ///////////////////////////////////////////////////////////////////
 // return number of decimal digits in 128-bit value X
 ///////////////////////////////////////////////////////////////////
 __BID_INLINE__ int
 __get_dec_digits64 (UINT128 X) {
   int_double tempx;
   int digits_x, bin_expon_cx;

   if (!X.w[1]) {
     //--- get number of bits in the coefficients of x and y ---
     tempx.d = (double) X.w[0];
     bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
     // get number of decimal digits in the coeff_x
     digits_x = estimate_decimal_digits[bin_expon_cx];
     if (X.w[0] >= power10_table_128[digits_x].w[0])
       digits_x++;
     return digits_x;
   }
   tempx.d = (double) X.w[1];
   bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
   // get number of decimal digits in the coeff_x
   digits_x = estimate_decimal_digits[bin_expon_cx + 64];
   if (__unsigned_compare_ge_128 (X, power10_table_128[digits_x]))
     digits_x++;

   return digits_x;
 }


 ////////////////////////////////////////////////////////////////////////////////
 //
 // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
 //
 ////////////////////////////////////////////////////////////////////////////////
 __BID_INLINE__ UINT64
 get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
 	    UINT64 sign_y, int final_exponent_y, UINT128 CY,
 	    int extra_digits, int rounding_mode, unsigned *fpsc) {
   UINT128 CY_L, CX, FS, F, CT, ST, T2;
   UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y;
   SINT64 D = 0;
   int_double tempx;
   int diff_dec_expon, extra_digits2, exponent_y, status;
   int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode;

   // CY has more than 16 decimal digits

   exponent_y = final_exponent_y - extra_digits;

 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
   rounding_mode = 0;
 #endif
 #ifdef IEEE_ROUND_NEAREST
   rounding_mode = 0;
 #endif

   if (exponent_x > exponent_y) {
     // normalize x
     //--- get number of bits in the coefficients of x and y ---
     tempx.d = (double) coefficient_x;
     bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
     // get number of decimal digits in the coeff_x
     digits_x = estimate_decimal_digits[bin_expon_cx];
     if (coefficient_x >= power10_table_128[digits_x].w[0])
       digits_x++;

     extra_dx = 16 - digits_x;
     coefficient_x *= power10_table_128[extra_dx].w[0];
     if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) {
       extra_dx++;
       coefficient_x = 10000000000000000ull;
     }
     exponent_x -= extra_dx;

     if (exponent_x > exponent_y) {

       // exponent_x > exponent_y
       diff_dec_expon = exponent_x - exponent_y;

       if (exponent_x <= final_exponent_y + 1) {
 	__mul_64x64_to_128 (CX, coefficient_x,
 			    power10_table_128[diff_dec_expon].w[0]);

 	if (sign_x == sign_y) {
 	  __add_128_128 (CT, CY, CX);
 	  if ((exponent_x >
 	       final_exponent_y) /*&& (final_exponent_y>0) */ )
 	    extra_digits++;
 	  if (__unsigned_compare_ge_128
 	      (CT, power10_table_128[16 + extra_digits]))
 	    extra_digits++;
 	} else {
 	  __sub_128_128 (CT, CY, CX);
 	  if (((SINT64) CT.w[1]) < 0) {
 	    CT.w[0] = 0 - CT.w[0];
 	    CT.w[1] = 0 - CT.w[1];
 	    if (CT.w[0])
 	      CT.w[1]--;
 	    sign_y = sign_x;
 	  } else if (!(CT.w[1] | CT.w[0])) {
 	    sign_y =
 	      (rounding_mode !=
 	       ROUNDING_DOWN) ? 0 : 0x8000000000000000ull;
 	  }
 	  if ((exponent_x + 1 >=
 	       final_exponent_y) /*&& (final_exponent_y>=0) */ ) {
 	    extra_digits = __get_dec_digits64 (CT) - 16;
 	    if (extra_digits <= 0) {
 	      if (!CT.w[0] && rounding_mode == ROUNDING_DOWN)
 		sign_y = 0x8000000000000000ull;
 	      return get_BID64 (sign_y, exponent_y, CT.w[0],
 				rounding_mode, fpsc);
 	    }
 	  } else
 	    if (__unsigned_compare_gt_128
 		(power10_table_128[15 + extra_digits], CT))
 	    extra_digits--;
 	}

 	return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits,
 				   rounding_mode, fpsc);
       }
       // diff_dec2+extra_digits is the number of digits to eliminate from
       //                           argument CY
       diff_dec2 = exponent_x - final_exponent_y;

       if (diff_dec2 >= 17) {
 #ifndef IEEE_ROUND_NEAREST
 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 	if ((rounding_mode) & 3) {
 	  switch (rounding_mode) {
 	  case ROUNDING_UP:
 	    if (!sign_y) {
 	      D = ((SINT64) (sign_x ^ sign_y)) >> 63;
 	      D = D + D + 1;
 	      coefficient_x += D;
 	    }
 	    break;
 	  case ROUNDING_DOWN:
 	    if (sign_y) {
 	      D = ((SINT64) (sign_x ^ sign_y)) >> 63;
 	      D = D + D + 1;
 	      coefficient_x += D;
 	    }
 	    break;
 	  case ROUNDING_TO_ZERO:
 	    if (sign_y != sign_x) {
 	      D = 0 - 1;
 	      coefficient_x += D;
 	    }
 	    break;
 	  }
 	  if (coefficient_x < 1000000000000000ull) {
 	    coefficient_x -= D;
 	    coefficient_x =
 	      D + (coefficient_x << 1) + (coefficient_x << 3);
 	    exponent_x--;
 	  }
 	}
 #endif
 #endif
 #ifdef SET_STATUS_FLAGS
 	if (CY.w[1] | CY.w[0])
 	  __set_status_flags (fpsc, INEXACT_EXCEPTION);
 #endif
 	return get_BID64 (sign_x, exponent_x, coefficient_x,
 			  rounding_mode, fpsc);
       }
       // here exponent_x <= 16+final_exponent_y

       // truncate CY to 16 dec. digits
       CYh = __truncate (CY, extra_digits);

       // get remainder
       T = power10_table_128[extra_digits].w[0];
       __mul_64x64_to_64 (CY0L, CYh, T);

       remainder_y = CY.w[0] - CY0L;

       // align coeff_x, CYh
       __mul_64x64_to_128 (CX, coefficient_x,
 			  power10_table_128[diff_dec2].w[0]);

       if (sign_x == sign_y) {
 	__add_128_64 (CT, CX, CYh);
 	if (__unsigned_compare_ge_128
 	    (CT, power10_table_128[16 + diff_dec2]))
 	  diff_dec2++;
       } else {
 	if (remainder_y)
 	  CYh++;
 	__sub_128_64 (CT, CX, CYh);
 	if (__unsigned_compare_gt_128
 	    (power10_table_128[15 + diff_dec2], CT))
 	  diff_dec2--;
       }

       return __bid_full_round64_remainder (sign_x, final_exponent_y, CT,
 					   diff_dec2, remainder_y,
 					   rounding_mode, fpsc, 0);
     }
   }
   // Here (exponent_x <= exponent_y)
   {
     diff_dec_expon = exponent_y - exponent_x;

     if (diff_dec_expon > MAX_FORMAT_DIGITS) {
       rmode = rounding_mode;

       if ((sign_x ^ sign_y)) {
 	if (!CY.w[0])
 	  CY.w[1]--;
 	CY.w[0]--;
 	if (__unsigned_compare_gt_128
 	    (power10_table_128[15 + extra_digits], CY)) {
 	  if (rmode & 3) {
 	    extra_digits--;
 	    final_exponent_y--;
 	  } else {
 	    CY.w[0] = 1000000000000000ull;
 	    CY.w[1] = 0;
 	    extra_digits = 0;
 	  }
 	}
       }
       __scale128_10 (CY, CY);
       extra_digits++;
       CY.w[0] |= 1;

       return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY,
 					  extra_digits, rmode, fpsc);
     }
     // apply sign to coeff_x
     sign_x ^= sign_y;
     sign_x = ((SINT64) sign_x) >> 63;
     CX.w[0] = (coefficient_x + sign_x) ^ sign_x;
     CX.w[1] = sign_x;

     // check whether CY (rounded to 16 digits) and CX have
     //                     any digits in the same position
     diff_dec2 = final_exponent_y - exponent_x;

     if (diff_dec2 <= 17) {
       // align CY to 10^ex
       S = power10_table_128[diff_dec_expon].w[0];
       __mul_64x128_short (CY_L, S, CY);

       __add_128_128 (ST, CY_L, CX);
       extra_digits2 = __get_dec_digits64 (ST) - 16;
       return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2,
 				 rounding_mode, fpsc);
     }
     // truncate CY to 16 dec. digits
     CYh = __truncate (CY, extra_digits);

     // get remainder
     T = power10_table_128[extra_digits].w[0];
     __mul_64x64_to_64 (CY0L, CYh, T);

     coefficient_y = CY.w[0] - CY0L;
     // add rounding constant
     rmode = rounding_mode;
     if (sign_y && (unsigned) (rmode - 1) < 2)
       rmode = 3 - rmode;
 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
     if (!(rmode & 3))	//ROUNDING_TO_NEAREST
 #endif
 #endif
     {
       coefficient_y += round_const_table[rmode][extra_digits];
     }
     // align coefficient_y,  coefficient_x
     S = power10_table_128[diff_dec_expon].w[0];
     __mul_64x64_to_128 (F, coefficient_y, S);

     // fraction
     __add_128_128 (FS, F, CX);

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
     if (rmode == 0)	//ROUNDING_TO_NEAREST
 #endif
     {
       // rounding code, here RN_EVEN
       // 10^(extra_digits+diff_dec_expon)
       T2 = power10_table_128[diff_dec_expon + extra_digits];
       if (__unsigned_compare_gt_128 (FS, T2)
 	  || ((CYh & 1) && __test_equal_128 (FS, T2))) {
 	CYh++;
 	__sub_128_128 (FS, FS, T2);
       }
     }
 #endif
 #ifndef IEEE_ROUND_NEAREST
 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     if (rmode == 4)	//ROUNDING_TO_NEAREST
 #endif
     {
       // rounding code, here RN_AWAY
       // 10^(extra_digits+diff_dec_expon)
       T2 = power10_table_128[diff_dec_expon + extra_digits];
       if (__unsigned_compare_ge_128 (FS, T2)) {
 	CYh++;
 	__sub_128_128 (FS, FS, T2);
       }
     }
 #endif
 #ifndef IEEE_ROUND_NEAREST
 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     switch (rmode) {
     case ROUNDING_DOWN:
     case ROUNDING_TO_ZERO:
       if ((SINT64) FS.w[1] < 0) {
 	CYh--;
 	if (CYh < 1000000000000000ull) {
 	  CYh = 9999999999999999ull;
 	  final_exponent_y--;
 	}
       } else {
 	T2 = power10_table_128[diff_dec_expon + extra_digits];
 	if (__unsigned_compare_ge_128 (FS, T2)) {
 	  CYh++;
 	  __sub_128_128 (FS, FS, T2);
 	}
       }
       break;
     case ROUNDING_UP:
       if ((SINT64) FS.w[1] < 0)
 	break;
       T2 = power10_table_128[diff_dec_expon + extra_digits];
       if (__unsigned_compare_gt_128 (FS, T2)) {
 	CYh += 2;
 	__sub_128_128 (FS, FS, T2);
       } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) {
 	CYh++;
 	FS.w[1] = FS.w[0] = 0;
       } else if (FS.w[1] | FS.w[0])
 	CYh++;
       break;
     }
 #endif
 #endif

 #ifdef SET_STATUS_FLAGS
     status = INEXACT_EXCEPTION;
 #ifndef IEEE_ROUND_NEAREST
 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     if (!(rmode & 3))
 #endif
 #endif
     {
       // RN modes
       if ((FS.w[1] ==
 	   round_const_table_128[0][diff_dec_expon + extra_digits].w[1])
 	  && (FS.w[0] ==
 	      round_const_table_128[0][diff_dec_expon +
 				       extra_digits].w[0]))
 	status = EXACT_STATUS;
     }
 #ifndef IEEE_ROUND_NEAREST
 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     else if (!FS.w[1] && !FS.w[0])
       status = EXACT_STATUS;
 #endif
 #endif

     __set_status_flags (fpsc, status);
 #endif

     return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode,
 		      fpsc);
   }

 }

 //////////////////////////////////////////////////////////////////////////
 //
 //  If coefficient_z is less than 16 digits long, normalize to 16 digits
 //
 /////////////////////////////////////////////////////////////////////////
 static UINT64
 BID_normalize (UINT64 sign_z, int exponent_z,
 	       UINT64 coefficient_z, UINT64 round_dir, int round_flag,
 	       int rounding_mode, unsigned *fpsc) {
   SINT64 D;
   int_double tempx;
   int digits_z, bin_expon, scale, rmode;

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
   rmode = rounding_mode;
   if (sign_z && (unsigned) (rmode - 1) < 2)
     rmode = 3 - rmode;
 #else
   if (coefficient_z >= power10_table_128[15].w[0])
     return z;
 #endif
 #endif

   //--- get number of bits in the coefficients of x and y ---
   tempx.d = (double) coefficient_z;
   bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
   // get number of decimal digits in the coeff_x
   digits_z = estimate_decimal_digits[bin_expon];
   if (coefficient_z >= power10_table_128[digits_z].w[0])
     digits_z++;

   scale = 16 - digits_z;
   exponent_z -= scale;
   if (exponent_z < 0) {
     scale += exponent_z;
     exponent_z = 0;
   }
   coefficient_z *= power10_table_128[scale].w[0];

 #ifdef SET_STATUS_FLAGS
   if (round_flag) {
     __set_status_flags (fpsc, INEXACT_EXCEPTION);
     if (coefficient_z < 1000000000000000ull)
       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
     else if ((coefficient_z == 1000000000000000ull) && !exponent_z
 	     && ((SINT64) (round_dir ^ sign_z) < 0) && round_flag
 	     && (rmode == ROUNDING_DOWN || rmode == ROUNDING_TO_ZERO))
       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
   }
 #endif

 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 #ifndef IEEE_ROUND_NEAREST
   if (round_flag && (rmode & 3)) {
     D = round_dir ^ sign_z;

     if (rmode == ROUNDING_UP) {
       if (D >= 0)
 	coefficient_z++;
     } else {
       if (D < 0)
 	coefficient_z--;
       if (coefficient_z < 1000000000000000ull && exponent_z) {
 	coefficient_z = 9999999999999999ull;
 	exponent_z--;
       }
     }
   }
 #endif
 #endif

   return get_BID64 (sign_z, exponent_z, coefficient_z, rounding_mode,
 		    fpsc);
 }


 //////////////////////////////////////////////////////////////////////////
 //
 //    0*10^ey + cz*10^ez,   ey<ez
 //
 //////////////////////////////////////////////////////////////////////////

 __BID_INLINE__ UINT64
 add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z,
 	    UINT64 coefficient_z, unsigned *prounding_mode,
 	    unsigned *fpsc) {
   int_double tempx;
   int bin_expon, scale_k, scale_cz;
   int diff_expon;

   diff_expon = exponent_z - exponent_y;

   tempx.d = (double) coefficient_z;
   bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
   scale_cz = estimate_decimal_digits[bin_expon];
   if (coefficient_z >= power10_table_128[scale_cz].w[0])
     scale_cz++;

   scale_k = 16 - scale_cz;
   if (diff_expon < scale_k)
     scale_k = diff_expon;
   coefficient_z *= power10_table_128[scale_k].w[0];

   return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z,
 		    *prounding_mode, fpsc);
 }
 #endif