gcc-4.3.1/libgcc/config/libbid/bid128_fma.c - toolchain/gcc - Git at Google

 /* Copyright (C) 2007  Free Software Foundation, Inc.

 This file is part of GCC.

 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 Software Foundation; either version 2, or (at your option) any later
 version.

 In addition to the permissions in the GNU General Public License, the
 Free Software Foundation gives you unlimited permission to link the
 compiled version of this file into combinations with other programs,
 and to distribute those combinations without any restriction coming
 from the use of this file.  (The General Public License restrictions
 do apply in other respects; for example, they cover modification of
 the file, and distribution when not linked into a combine
 executable.)

 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING.  If not, write to the Free
 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
 02110-1301, USA.  */

 /*****************************************************************************
  *
  *  BID128 fma   x * y + z
  *
  ****************************************************************************/

 #include "bid_internal.h"

 static void
 rounding_correction (unsigned int rnd_mode,
         	     unsigned int is_inexact_lt_midpoint,
         	     unsigned int is_inexact_gt_midpoint,
         	     unsigned int is_midpoint_lt_even,
         	     unsigned int is_midpoint_gt_even,
         	     int unbexp,
         	     UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
   // unbiased true exponent unbexp may be larger than emax

   UINT128 res = *ptrres; // expected to have the correct sign and coefficient
   // (the exponent field is ignored, as unbexp is used instead)
   UINT64 sign, exp;
   UINT64 C_hi, C_lo;

   // general correction from RN to RA, RM, RP, RZ
   // Note: if the result is negative, then is_inexact_lt_midpoint,
   // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
   // have to be considered as if determined for the absolute value of the
   // result (so they seem to be reversed)

   if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
       is_midpoint_lt_even || is_midpoint_gt_even) {
     *ptrfpsf |= INEXACT_EXCEPTION;
   }
   // apply correction to result calculated with unbounded exponent
   sign = res.w[1] & MASK_SIGN;
   exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
   C_hi = res.w[1] & MASK_COEFF;
   C_lo = res.w[0];
   if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
       is_midpoint_gt_even))) ||
       (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
       is_midpoint_gt_even)))) {
     // C = C + 1
     C_lo = C_lo + 1;
     if (C_lo == 0)
       C_hi = C_hi + 1;
     if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
       // C = 10^34 => rounding overflow
       C_hi = 0x0000314dc6448d93ull;
       C_lo = 0x38c15b0a00000000ull; // 10^33
       // exp = exp + EXP_P1;
       unbexp = unbexp + 1;
       exp = (UINT64) (unbexp + 6176) << 49;
     }
   } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
       ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
       (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
     // C = C - 1
     C_lo = C_lo - 1;
     if (C_lo == 0xffffffffffffffffull)
       C_hi--;
     // check if we crossed into the lower decade
     if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
       // C = 10^33 - 1
       if (exp > 0) {
         C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
         C_lo = 0x378d8e63ffffffffull;
         // exp = exp - EXP_P1;
         unbexp = unbexp - 1;
         exp = (UINT64) (unbexp + 6176) << 49;
       } else { // if exp = 0
         if (is_midpoint_lt_even || is_midpoint_lt_even ||
             is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
           *ptrfpsf |= UNDERFLOW_EXCEPTION;
       }
     }
   } else {
     ; // the result is already correct
   }
   if (unbexp > expmax) { // 6111
     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
     exp = 0;
     if (!sign) { // result is positive
       if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
         C_hi = 0x7800000000000000ull;
         C_lo = 0x0000000000000000ull;
       } else { // res = +MAXFP = (10^34-1) * 10^emax
         C_hi = 0x5fffed09bead87c0ull;
         C_lo = 0x378d8e63ffffffffull;
       }
     } else { // result is negative
       if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
         C_hi = 0xf800000000000000ull;
         C_lo = 0x0000000000000000ull;
       } else { // res = -MAXFP = -(10^34-1) * 10^emax
         C_hi = 0xdfffed09bead87c0ull;
         C_lo = 0x378d8e63ffffffffull;
       }
     }
   }
   // assemble the result
   res.w[1] = sign | exp | C_hi;
   res.w[0] = C_lo;
   *ptrres = res;
 }

 static void
 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
   // *z = x + yl assume the sum fits in 256 bits
   UINT256 z;
   z.w[0] = x.w[0] + y.w[0];
   if (z.w[0] < x.w[0]) {
     x.w[1]++;
     if (x.w[1] == 0x0000000000000000ull) {
       x.w[2]++;
       if (x.w[2] == 0x0000000000000000ull) {
         x.w[3]++;
       }
     }
   }
   z.w[1] = x.w[1] + y.w[1];
   if (z.w[1] < x.w[1]) {
     x.w[2]++;
     if (x.w[2] == 0x0000000000000000ull) {
       x.w[3]++;
     }
   }
   z.w[2] = x.w[2] + y.w[2];
   if (z.w[2] < x.w[2]) {
     x.w[3]++;
   }
   z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
   *pz = z;
 }

 static void
 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
   // *z = x - y; assume x >= y
   UINT256 z;
   z.w[0] = x.w[0] - y.w[0];
   if (z.w[0] > x.w[0]) {
     x.w[1]--;
     if (x.w[1] == 0xffffffffffffffffull) {
       x.w[2]--;
       if (x.w[2] == 0xffffffffffffffffull) {
         x.w[3]--;
       }
     }
   }
   z.w[1] = x.w[1] - y.w[1];
   if (z.w[1] > x.w[1]) {
     x.w[2]--;
     if (x.w[2] == 0xffffffffffffffffull) {
       x.w[3]--;
     }
   }
   z.w[2] = x.w[2] - y.w[2];
   if (z.w[2] > x.w[2]) {
     x.w[3]--;
   }
   z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
   *pz = z;
 }


 static int
 nr_digits256 (UINT256 R256) {
   int ind;
   // determine the number of decimal digits in R256
   if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
     // between 1 and 19 digits
     for (ind = 1; ind <= 19; ind++) {
       if (R256.w[0] < ten2k64[ind]) {
         break;
       }
     }
     // ind digits
   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
              (R256.w[1] < ten2k128[0].w[1] ||
               (R256.w[1] == ten2k128[0].w[1]
                && R256.w[0] < ten2k128[0].w[0]))) {
     // 20 digits
     ind = 20;
   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
     // between 21 and 38 digits
     for (ind = 1; ind <= 18; ind++) {
       if (R256.w[1] < ten2k128[ind].w[1] ||
           (R256.w[1] == ten2k128[ind].w[1] &&
            R256.w[0] < ten2k128[ind].w[0])) {
         break;
       }
     }
     // ind + 20 digits
     ind = ind + 20;
   } else if (R256.w[3] == 0x0 &&
              (R256.w[2] < ten2k256[0].w[2] ||
               (R256.w[2] == ten2k256[0].w[2] &&
                R256.w[1] < ten2k256[0].w[1]) ||
               (R256.w[2] == ten2k256[0].w[2] &&
                R256.w[1] == ten2k256[0].w[1] &&
                R256.w[0] < ten2k256[0].w[0]))) {
     // 39 digits
     ind = 39;
   } else {
     // between 40 and 68 digits
     for (ind = 1; ind <= 29; ind++) {
       if (R256.w[3] < ten2k256[ind].w[3] ||
           (R256.w[3] == ten2k256[ind].w[3] &&
            R256.w[2] < ten2k256[ind].w[2]) ||
           (R256.w[3] == ten2k256[ind].w[3] &&
            R256.w[2] == ten2k256[ind].w[2] &&
            R256.w[1] < ten2k256[ind].w[1]) ||
           (R256.w[3] == ten2k256[ind].w[3] &&
            R256.w[2] == ten2k256[ind].w[2] &&
            R256.w[1] == ten2k256[ind].w[1] &&
            R256.w[0] < ten2k256[ind].w[0])) {
         break;
       }
     }
     // ind + 39 digits
     ind = ind + 39;
   }
   return (ind);
 }

 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
 // use the rounding information from ptr_is_* to avoid a double rounding error
 static void
 add_and_round (int q3,
                int q4,
                int e4,
                int delta,
                int p34,
                UINT64 z_sign,
                UINT64 p_sign,
                UINT128 C3,
                UINT256 C4,
                int rnd_mode,
                int *ptr_is_midpoint_lt_even,
                int *ptr_is_midpoint_gt_even,
                int *ptr_is_inexact_lt_midpoint,
                int *ptr_is_inexact_gt_midpoint,
                _IDEC_flags * ptrfpsf, UINT128 * ptrres) {

   int scale;
   int x0;
   int ind;
   UINT64 R64;
   UINT128 P128, R128;
   UINT192 P192, R192;
   UINT256 R256;
   int is_midpoint_lt_even = 0;
   int is_midpoint_gt_even = 0;
   int is_inexact_lt_midpoint = 0;
   int is_inexact_gt_midpoint = 0;
   int is_midpoint_lt_even0 = 0;
   int is_midpoint_gt_even0 = 0;
   int is_inexact_lt_midpoint0 = 0;
   int is_inexact_gt_midpoint0 = 0;
   int incr_exp = 0;
   int is_tiny = 0;
   int lt_half_ulp = 0;
   int eq_half_ulp = 0;
   // int gt_half_ulp = 0;
   UINT128 res = *ptrres;

   // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
   scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
   // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1

   // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
   // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
   if (scale == 0) {
     R256.w[3] = 0x0ull;
     R256.w[2] = 0x0ull;
     R256.w[1] = C3.w[1];
     R256.w[0] = C3.w[0];
   } else if (scale <= 19) { // 10^scale fits in 64 bits
     P128.w[1] = 0;
     P128.w[0] = ten2k64[scale];
     __mul_128x128_to_256 (R256, P128, C3);
   } else if (scale <= 38) { // 10^scale fits in 128 bits
     __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
   } else if (scale <= 57) { // 39 <= scale <= 57
     // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
     // (10^67 has 223 bits; 10^69 has 230 bits);
     // must split the computation:
     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
     // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
     __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
     // now multiply R128 by 10^38
     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
   } else { // 58 <= scale <= 66
     // 10^scale takes between 193 and 220 bits,
     // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
     // must split the computation:
     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
     // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
     // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
     // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
     __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
     // now calculate 10*38 * 10^(scale-38) * C3
     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
   }
   // C3 * 10^scale is now in R256

   // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
   // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
   // possible
   // add/subtract C4 and C3 * 10^scale; the exponent is e4
   if (p_sign == z_sign) { // R256 = C4 + R256
     // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
     // but may require rounding
     add256 (C4, R256, &R256);
   } else { // if (p_sign != z_sign) { // R256 = C4 - R256
     // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
     // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
     // but may require rounding

     // compare first R256 = C3 * 10^scale and C4
     if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
         R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
       // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
       // but may require rounding
       sub256 (R256, C4, &R256);
       // flip p_sign too, because the result has the sign of z
       p_sign = z_sign;
     } else { // if C4 > C3 * 10^scale
       // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
       // but may require rounding
       sub256 (C4, R256, &R256);
     }
     // if the result is pure zero, the sign depends on the rounding mode
     // (x*y and z had opposite signs)
     if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
         R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
       if (rnd_mode != ROUNDING_DOWN)
         p_sign = 0x0000000000000000ull;
       else
         p_sign = 0x8000000000000000ull;
       // the exponent is max (e4, expmin)
       if (e4 < -6176)
         e4 = expmin;
       // assemble result
       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
       res.w[0] = 0x0;
       *ptrres = res;
       return;
     }
   }

   // determine the number of decimal digits in R256
   ind = nr_digits256 (R256);

   // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
   // round to the destination precision, with unbounded exponent

   if (ind <= p34) {
     // result rounded to the destination precision with unbounded exponent
     // is exact
     if (ind + e4 < p34 + expmin) {
       is_tiny = 1; // applies to all rounding modes
     }
     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
     res.w[0] = R256.w[0];
     // Note: res is correct only if expmin <= e4 <= expmax
   } else { // if (ind > p34)
     // if more than P digits, round to nearest to P digits
     // round R256 to p34 digits
     x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
     if (ind <= 38) {
       P128.w[1] = R256.w[1];
       P128.w[0] = R256.w[0];
       round128_19_38 (ind, x0, P128, &R128, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
     } else if (ind <= 57) {
       P192.w[2] = R256.w[2];
       P192.w[1] = R256.w[1];
       P192.w[0] = R256.w[0];
       round192_39_57 (ind, x0, P192, &R192, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
       R128.w[1] = R192.w[1];
       R128.w[0] = R192.w[0];
     } else { // if (ind <= 68)
       round256_58_76 (ind, x0, R256, &R256, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
       R128.w[1] = R256.w[1];
       R128.w[0] = R256.w[0];
     }
     // the rounded result has p34 = 34 digits
     e4 = e4 + x0 + incr_exp;
     if (rnd_mode == ROUNDING_TO_NEAREST) {
       if (e4 < expmin) {
         is_tiny = 1; // for other rounding modes apply correction
       }
     } else {
       // for RM, RP, RZ, RA apply correction in order to determine tininess
       // but do not save the result; apply the correction to
       // (-1)^p_sign * significand * 10^0
       P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
       P128.w[0] = R128.w[0];
       rounding_correction (rnd_mode,
         		   is_inexact_lt_midpoint,
         		   is_inexact_gt_midpoint, is_midpoint_lt_even,
         		   is_midpoint_gt_even, 0, &P128, ptrfpsf);
       scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
       // the number of digits in the significand is p34 = 34
       if (e4 + scale < expmin) {
         is_tiny = 1;
       }
     }
     ind = p34; // the number of decimal digits in the signifcand of res
     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
     res.w[0] = R128.w[0];
     // Note: res is correct only if expmin <= e4 <= expmax
     // set the inexact flag after rounding with bounded exponent, if any
   }
   // at this point we have the result rounded with unbounded exponent in
   // res and we know its tininess:
   // res = (-1)^p_sign * significand * 10^e4,
   // where q (significand) = ind <= p34
   // Note: res is correct only if expmin <= e4 <= expmax

   // check for overflow if RN
   if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
     res.w[1] = p_sign | 0x7800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
     *ptrres = res;
     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
     return; // BID_RETURN (res)
   } // else not overflow or not RN, so continue

   // if (e4 >= expmin) we have the result rounded with bounded exponent
   if (e4 < expmin) {
     x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
     // where the result rounded [at most] once is
     //   (-1)^p_sign * significand_res * 10^e4

     // avoid double rounding error
     is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
     is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
     is_midpoint_lt_even0 = is_midpoint_lt_even;
     is_midpoint_gt_even0 = is_midpoint_gt_even;
     is_inexact_lt_midpoint = 0;
     is_inexact_gt_midpoint = 0;
     is_midpoint_lt_even = 0;
     is_midpoint_gt_even = 0;

     if (x0 > ind) {
       // nothing is left of res when moving the decimal point left x0 digits
       is_inexact_lt_midpoint = 1;
       res.w[1] = p_sign | 0x0000000000000000ull;
       res.w[0] = 0x0000000000000000ull;
       e4 = expmin;
     } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
       // this is <, =, or > 1/2 ulp
       // compare the ind-digit value in the significand of res with
       // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
       // less than, equal to, or greater than 1/2 ulp (significand of res)
       R128.w[1] = res.w[1] & MASK_COEFF;
       R128.w[0] = res.w[0];
       if (ind <= 19) {
         if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
           lt_half_ulp = 1;
           is_inexact_lt_midpoint = 1;
         } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
           eq_half_ulp = 1;
           is_midpoint_gt_even = 1;
         } else { // > 1/2 ulp
           // gt_half_ulp = 1;
           is_inexact_gt_midpoint = 1;
         }
       } else { // if (ind <= 38) {
         if (R128.w[1] < midpoint128[ind - 20].w[1] ||
             (R128.w[1] == midpoint128[ind - 20].w[1] &&
             R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
           lt_half_ulp = 1;
           is_inexact_lt_midpoint = 1;
         } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
             R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
           eq_half_ulp = 1;
           is_midpoint_gt_even = 1;
         } else { // > 1/2 ulp
           // gt_half_ulp = 1;
           is_inexact_gt_midpoint = 1;
         }
       }
       if (lt_half_ulp || eq_half_ulp) {
         // res = +0.0 * 10^expmin
         res.w[1] = 0x0000000000000000ull;
         res.w[0] = 0x0000000000000000ull;
       } else { // if (gt_half_ulp)
         // res = +1 * 10^expmin
         res.w[1] = 0x0000000000000000ull;
         res.w[0] = 0x0000000000000001ull;
       }
       res.w[1] = p_sign | res.w[1];
       e4 = expmin;
     } else { // if (1 <= x0 <= ind - 1 <= 33)
       // round the ind-digit result to ind - x0 digits

       if (ind <= 18) { // 2 <= ind <= 18
         round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
         res.w[1] = 0x0;
         res.w[0] = R64;
       } else if (ind <= 38) {
         P128.w[1] = res.w[1] & MASK_COEFF;
         P128.w[0] = res.w[0];
         round128_19_38 (ind, x0, P128, &res, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
       }
       e4 = e4 + x0; // expmin
       // we want the exponent to be expmin, so if incr_exp = 1 then
       // multiply the rounded result by 10 - it will still fit in 113 bits
       if (incr_exp) {
         // 64 x 128 -> 128
         P128.w[1] = res.w[1] & MASK_COEFF;
         P128.w[0] = res.w[0];
         __mul_64x128_to_128 (res, ten2k64[1], P128);
       }
       res.w[1] =
         p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
       // avoid a double rounding error
       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
           is_midpoint_lt_even) { // double rounding error upward
         // res = res - 1
         res.w[0]--;
         if (res.w[0] == 0xffffffffffffffffull)
           res.w[1]--;
         // Note: a double rounding error upward is not possible; for this
         // the result after the first rounding would have to be 99...95
         // (35 digits in all), possibly followed by a number of zeros; this
         // is not possible in Cases (2)-(6) or (15)-(17) which may get here
         is_midpoint_lt_even = 0;
         is_inexact_lt_midpoint = 1;
       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
           is_midpoint_gt_even) { // double rounding error downward
         // res = res + 1
         res.w[0]++;
         if (res.w[0] == 0)
           res.w[1]++;
         is_midpoint_gt_even = 0;
         is_inexact_gt_midpoint = 1;
       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
         // if this second rounding was exact the result may still be
         // inexact because of the first rounding
         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
           is_inexact_gt_midpoint = 1;
         }
         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
           is_inexact_lt_midpoint = 1;
         }
       } else if (is_midpoint_gt_even &&
         	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
         // pulled up to a midpoint
         is_inexact_lt_midpoint = 1;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
       } else if (is_midpoint_lt_even &&
         	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
         // pulled down to a midpoint
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 1;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
       } else {
         ;
       }
     }
   }
   // res contains the correct result
   // apply correction if not rounding to nearest
   if (rnd_mode != ROUNDING_TO_NEAREST) {
     rounding_correction (rnd_mode,
         		 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
         		 is_midpoint_lt_even, is_midpoint_gt_even,
         		 e4, &res, ptrfpsf);
   }
   if (is_midpoint_lt_even || is_midpoint_gt_even ||
       is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
     // set the inexact flag
     *ptrfpsf |= INEXACT_EXCEPTION;
     if (is_tiny)
       *ptrfpsf |= UNDERFLOW_EXCEPTION;
   }

   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   *ptrres = res;
   return;
 }


 #if DECIMAL_CALL_BY_REFERENCE
 static void
 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
         	int *ptr_is_midpoint_gt_even,
         	int *ptr_is_inexact_lt_midpoint,
         	int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
         	UINT128 * px, UINT128 * py,
         	UINT128 *
         	pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
         	_EXC_INFO_PARAM) {
   UINT128 x = *px, y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 static UINT128
 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
         	int *ptr_is_midpoint_gt_even,
         	int *ptr_is_inexact_lt_midpoint,
         	int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
         	UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
         	_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
 #endif

   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
   UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
   int true_p_exp;
   UINT128 C1, C2, C3;
   UINT256 C4;
   int q1 = 0, q2 = 0, q3 = 0, q4;
   int e1, e2, e3, e4;
   int scale, ind, delta, x0;
   int p34 = P34; // used to modify the limit on the number of digits
   BID_UI64DOUBLE tmp;
   int x_nr_bits, y_nr_bits, z_nr_bits;
   unsigned int save_fpsf;
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
   int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
   int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
   int incr_exp = 0;
   int lsb;
   int lt_half_ulp = 0;
   int eq_half_ulp = 0;
   int gt_half_ulp = 0;
   int is_tiny = 0;
   UINT64 R64, tmp64;
   UINT128 P128, R128;
   UINT192 P192, R192;
   UINT256 R256;

   // the following are based on the table of special cases for fma; the NaN
   // behavior is similar to that of the IA-64 Architecture fma

   // identify cases where at least one operand is NaN

   BID_SWAP128 (x);
   BID_SWAP128 (y);
   BID_SWAP128 (z);
   if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
     // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
     // check first for non-canonical NaN payload
     if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
         (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
          (y.w[0] > 0x38c15b09ffffffffull))) {
       y.w[1] = y.w[1] & 0xffffc00000000000ull;
       y.w[0] = 0x0ull;
     }
     if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
       // return quiet (y)
       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
       res.w[0] = y.w[0];
     } else { // y is QNaN
       // return y
       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
       res.w[0] = y.w[0];
       // if z = SNaN or x = SNaN signal invalid exception
       if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
           (x.w[1] & MASK_SNAN) == MASK_SNAN) {
         // set invalid flag
         *pfpsf |= INVALID_EXCEPTION;
       }
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
     // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
     // check first for non-canonical NaN payload
     if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
         (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
          (z.w[0] > 0x38c15b09ffffffffull))) {
       z.w[1] = z.w[1] & 0xffffc00000000000ull;
       z.w[0] = 0x0ull;
     }
     if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
       // return quiet (z)
       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
       res.w[0] = z.w[0];
     } else { // z is QNaN
       // return z
       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
       res.w[0] = z.w[0];
       // if x = SNaN signal invalid exception
       if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
         // set invalid flag
         *pfpsf |= INVALID_EXCEPTION;
       }
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
     // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
     // check first for non-canonical NaN payload
     if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
         (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
          (x.w[0] > 0x38c15b09ffffffffull))) {
       x.w[1] = x.w[1] & 0xffffc00000000000ull;
       x.w[0] = 0x0ull;
     }
     if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
       // return quiet (x)
       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
       res.w[0] = x.w[0];
     } else { // x is QNaN
       // return x
       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
       res.w[0] = x.w[0];
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   }
   // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
   // for non-canonical values

   x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   C1.w[1] = x.w[1] & MASK_COEFF;
   C1.w[0] = x.w[0];
   if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
     // if x is not infinity check for non-canonical values - treated as zero
     if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
       // non-canonical
       x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
       C1.w[1] = 0; // significand high
       C1.w[0] = 0; // significand low
     } else { // G0_G1 != 11
       x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
       if (C1.w[1] > 0x0001ed09bead87c0ull ||
           (C1.w[1] == 0x0001ed09bead87c0ull &&
            C1.w[0] > 0x378d8e63ffffffffull)) {
         // x is non-canonical if coefficient is larger than 10^34 -1
         C1.w[1] = 0;
         C1.w[0] = 0;
       } else { // canonical
         ;
       }
     }
   }
   y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   C2.w[1] = y.w[1] & MASK_COEFF;
   C2.w[0] = y.w[0];
   if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
     // if y is not infinity check for non-canonical values - treated as zero
     if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
       // non-canonical
       y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
       C2.w[1] = 0; // significand high
       C2.w[0] = 0; // significand low
     } else { // G0_G1 != 11
       y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
       if (C2.w[1] > 0x0001ed09bead87c0ull ||
           (C2.w[1] == 0x0001ed09bead87c0ull &&
            C2.w[0] > 0x378d8e63ffffffffull)) {
         // y is non-canonical if coefficient is larger than 10^34 -1
         C2.w[1] = 0;
         C2.w[0] = 0;
       } else { // canonical
         ;
       }
     }
   }
   z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   C3.w[1] = z.w[1] & MASK_COEFF;
   C3.w[0] = z.w[0];
   if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
     // if z is not infinity check for non-canonical values - treated as zero
     if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
       // non-canonical
       z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
       C3.w[1] = 0; // significand high
       C3.w[0] = 0; // significand low
     } else { // G0_G1 != 11
       z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
       if (C3.w[1] > 0x0001ed09bead87c0ull ||
           (C3.w[1] == 0x0001ed09bead87c0ull &&
            C3.w[0] > 0x378d8e63ffffffffull)) {
         // z is non-canonical if coefficient is larger than 10^34 -1
         C3.w[1] = 0;
         C3.w[0] = 0;
       } else { // canonical
         ;
       }
     }
   }

   p_sign = x_sign ^ y_sign; // sign of the product

   // identify cases where at least one operand is infinity

   if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
     if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
         if (p_sign == z_sign) {
           res.w[1] = z_sign | MASK_INF;
           res.w[0] = 0x0;
         } else {
           // return QNaN Indefinite
           res.w[1] = 0x7c00000000000000ull;
           res.w[0] = 0x0000000000000000ull;
           // set invalid flag
           *pfpsf |= INVALID_EXCEPTION;
         }
       } else { // z = 0 or z = f
         res.w[1] = p_sign | MASK_INF;
         res.w[0] = 0x0;
       }
     } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
         if (p_sign == z_sign) {
           res.w[1] = z_sign | MASK_INF;
           res.w[0] = 0x0;
         } else {
           // return QNaN Indefinite
           res.w[1] = 0x7c00000000000000ull;
           res.w[0] = 0x0000000000000000ull;
           // set invalid flag
           *pfpsf |= INVALID_EXCEPTION;
         }
       } else { // z = 0 or z = f
         res.w[1] = p_sign | MASK_INF;
         res.w[0] = 0x0;
       }
     } else { // y = 0
       // return QNaN Indefinite
       res.w[1] = 0x7c00000000000000ull;
       res.w[0] = 0x0000000000000000ull;
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
     if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
       // x = f, necessarily
       if ((p_sign != z_sign)
           || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
         // return QNaN Indefinite
         res.w[1] = 0x7c00000000000000ull;
         res.w[0] = 0x0000000000000000ull;
         // set invalid flag
         *pfpsf |= INVALID_EXCEPTION;
       } else {
         res.w[1] = z_sign | MASK_INF;
         res.w[0] = 0x0;
       }
     } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
       // z = 0, f, inf
       // return QNaN Indefinite
       res.w[1] = 0x7c00000000000000ull;
       res.w[0] = 0x0000000000000000ull;
       // set invalid flag
       *pfpsf |= INVALID_EXCEPTION;
     } else {
       // x = f and z = 0, f, necessarily
       res.w[1] = p_sign | MASK_INF;
       res.w[0] = 0x0;
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
     // x = 0, f and y = 0, f, necessarily
     res.w[1] = z_sign | MASK_INF;
     res.w[0] = 0x0;
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   }

   true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
   if (true_p_exp < -6176)
     p_exp = 0; // cannot be less than EXP_MIN
   else
     p_exp = (UINT64) (true_p_exp + 6176) << 49;

   if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
     // the result is 0
     if (p_exp < z_exp)
       res.w[1] = p_exp; // preferred exponent
     else
       res.w[1] = z_exp; // preferred exponent
     if (p_sign == z_sign) {
       res.w[1] |= z_sign;
       res.w[0] = 0x0;
     } else { // x * y and z have opposite signs
       if (rnd_mode == ROUNDING_DOWN) {
         // res = -0.0
         res.w[1] |= MASK_SIGN;
         res.w[0] = 0x0;
       } else {
         // res = +0.0
         // res.w[1] |= 0x0;
         res.w[0] = 0x0;
       }
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   }
   // from this point on, we may need to know the number of decimal digits
   // in the significands of x, y, z when x, y, z != 0

   if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
     // q1 = nr. of decimal digits in x
     // determine first the nr. of bits in x
     if (C1.w[1] == 0) {
       if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
         // split the 64-bit value in two 32-bit halves to avoid rounding errors
         if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
           tmp.d = (double) (C1.w[0] >> 32); // exact conversion
           x_nr_bits =
             33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         } else { // x < 2^32
           tmp.d = (double) (C1.w[0]); // exact conversion
           x_nr_bits =
             1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         }
       } else { // if x < 2^53
         tmp.d = (double) C1.w[0]; // exact conversion
         x_nr_bits =
           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       }
     } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
       tmp.d = (double) C1.w[1]; // exact conversion
       x_nr_bits =
         65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
     q1 = nr_digits[x_nr_bits - 1].digits;
     if (q1 == 0) {
       q1 = nr_digits[x_nr_bits - 1].digits1;
       if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
           (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
            C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
         q1++;
     }
   }

   if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
     if (C2.w[1] == 0) {
       if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
         // split the 64-bit value in two 32-bit halves to avoid rounding errors
         if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
           tmp.d = (double) (C2.w[0] >> 32); // exact conversion
           y_nr_bits =
             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         } else { // y < 2^32
           tmp.d = (double) C2.w[0]; // exact conversion
           y_nr_bits =
             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         }
       } else { // if y < 2^53
         tmp.d = (double) C2.w[0]; // exact conversion
         y_nr_bits =
           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       }
     } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
       tmp.d = (double) C2.w[1]; // exact conversion
       y_nr_bits =
         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }

     q2 = nr_digits[y_nr_bits].digits;
     if (q2 == 0) {
       q2 = nr_digits[y_nr_bits].digits1;
       if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
           (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
            C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
         q2++;
     }
   }

   if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
     if (C3.w[1] == 0) {
       if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
         // split the 64-bit value in two 32-bit halves to avoid rounding errors
         if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
           tmp.d = (double) (C3.w[0] >> 32); // exact conversion
           z_nr_bits =
             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         } else { // z < 2^32
           tmp.d = (double) C3.w[0]; // exact conversion
           z_nr_bits =
             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
         }
       } else { // if z < 2^53
         tmp.d = (double) C3.w[0]; // exact conversion
         z_nr_bits =
           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       }
     } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
       tmp.d = (double) C3.w[1]; // exact conversion
       z_nr_bits =
         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }

     q3 = nr_digits[z_nr_bits].digits;
     if (q3 == 0) {
       q3 = nr_digits[z_nr_bits].digits1;
       if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
           (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
            C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
         q3++;
     }
   }

   if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
       (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
     // x = 0 or y = 0
     // z = f, necessarily; for 0 + z return z, with the preferred exponent
     // the result is z, but need to get the preferred exponent
     if (z_exp <= p_exp) { // the preferred exponent is z_exp
       res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
       res.w[0] = C3.w[0];
     } else { // if (p_exp < z_exp) the preferred exponent is p_exp
       // return (C3 * 10^scale) * 10^(z_exp - scale)
       // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
       scale = p34 - q3;
       ind = (z_exp - p_exp) >> 49;
       if (ind < scale)
         scale = ind;
       if (scale == 0) {
         res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
         res.w[0] = z.w[0];
       } else if (q3 <= 19) { // z fits in 64 bits
         if (scale <= 19) { // 10^scale fits in 64 bits
           // 64 x 64 C3.w[0] * ten2k64[scale]
           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
         } else { // 10^scale fits in 128 bits
           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
         }
       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
         // 64 x 128 ten2k64[scale] * C3
         __mul_128x64_to_128 (res, ten2k64[scale], C3);
       }
       // subtract scale from the exponent
       z_exp = z_exp - ((UINT64) scale << 49);
       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
     }
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } else {
     ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
   }

   e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
   e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
   e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
   e4 = e1 + e2; // unbiased exponent of the exact x * y

   // calculate C1 * C2 and its number of decimal digits, q4

   // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
   // where 2 <= q1 + q2 <= 68
   // calculate C4 = C1 * C2 and determine q
   C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
   if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
     C4.w[0] = C1.w[0] * C2.w[0];
     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
     if (C4.w[0] < ten2k64[q1 + q2 - 1])
       q4 = q1 + q2 - 1; // q4 in [1, 18]
     else
       q4 = q1 + q2; // q4 in [2, 19]
     // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
   } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
     // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
     __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
     // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
     if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
       q4 = 19; // 19 = q1 + q2 - 1
     } else {
       // if (C4.w[1] == 0)
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       q4 = 20; // 20 = q1 + q2
     }
   } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
     // C4 = C1 * C2 fits in 64 or 128 bits
     // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
     // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
     if (q1 <= 19) {
       __mul_128x64_to_128 (C4, C1.w[0], C2);
     } else { // q2 <= 19
       __mul_128x64_to_128 (C4, C2.w[0], C1);
     }
     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
     if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
         (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
          C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
       // if (C4.w[1] == 0) // q4 = 20, necessarily
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       q4 = q1 + q2 - 1; // q4 in [20, 37]
     } else {
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       q4 = q1 + q2; // q4 in [21, 38]
     }
   } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
     // both C1 and C2 fit in 128 bits (actually in 113 bits)
     // may replace this by 128x128_to192
     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
     // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
     if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
         		 (C4.w[1] == ten2k128[18].w[1]
         		  && C4.w[0] < ten2k128[18].w[0]))) {
       // 18 = 38 - 20 = q1+q2-1 - 20
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       q4 = 38; // 38 = q1 + q2 - 1
     } else {
       // if (C4.w[2] == 0)
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       q4 = 39; // 39 = q1 + q2
     }
   } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
     // C4 = C1 * C2 fits in 128 or 192 bits
     // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
     // may fit in 64 bits
     if (C1.w[1] == 0) { // C1 fits in 64 bits
       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
     } else if (C2.w[1] == 0) { // C2 fits in 64 bits
       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
     } else { // both C1 and C2 require 128 bits
       // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
       __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
     }
     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
     if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
         (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
          (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
           (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
            C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
       // if (C4.w[2] == 0) // q4 = 39, necessarily
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       q4 = q1 + q2 - 1; // q4 in [39, 56]
     } else {
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       q4 = q1 + q2; // q4 in [40, 57]
     }
   } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
     // may fit in 64 bits
     if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
     } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
     } else { // C1 * C2 will fit in 192 bits or in 256 bits
       __mul_128x128_to_256 (C4, C1, C2);
     }
     // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
     if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
         		 (C4.w[2] == ten2k256[18].w[2]
         		  && (C4.w[1] < ten2k256[18].w[1]
         		      || (C4.w[1] == ten2k256[18].w[1]
         			  && C4.w[0] < ten2k256[18].w[0]))))) {
       // 18 = 57 - 39 = q1+q2-1 - 39
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       q4 = 57; // 57 = q1 + q2 - 1
     } else {
       // if (C4.w[3] == 0)
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
       q4 = 58; // 58 = q1 + q2
     }
   } else { // if 59 <= q1 + q2 <= 68
     // C4 = C1 * C2 fits in 192 or 256 bits
     // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
     // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
     // 64 bits
     // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
     if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
         (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
          (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
           (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
            (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
             (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
              C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
       // if (C4.w[3] == 0) // q4 = 58, necessarily
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
       // else
       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
       q4 = q1 + q2 - 1; // q4 in [58, 67]
     } else {
       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
       q4 = q1 + q2; // q4 in [59, 68]
     }
   }

   if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
     save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
     *pfpsf = 0;

     if (q4 > p34) {

       // truncate C4 to p34 digits into res
       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
       x0 = q4 - p34;
       if (q4 <= 38) {
         P128.w[1] = C4.w[1];
         P128.w[0] = C4.w[0];
         round128_19_38 (q4, x0, P128, &res, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
       } else if (q4 <= 57) { // 35 <= q4 <= 57
         P192.w[2] = C4.w[2];
         P192.w[1] = C4.w[1];
         P192.w[0] = C4.w[0];
         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         res.w[0] = R192.w[0];
         res.w[1] = R192.w[1];
       } else { // if (q4 <= 68)
         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         res.w[0] = R256.w[0];
         res.w[1] = R256.w[1];
       }
       e4 = e4 + x0;
       if (incr_exp) {
         e4 = e4 + 1;
       }
       q4 = p34;
       // res is now the coefficient of the result rounded to the destination
       // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
     } else { // if (q4 <= p34)
       // C4 * 10^e4 is the result rounded to the destination precision, with
       // unbounded exponent (which is exact)

       if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
         // e4 is too large, but can be brought within range by scaling up C4
         scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
         // res = (C4 * 10^scale) * 10^expmax
         if (q4 <= 19) { // C4 fits in 64 bits
           if (scale <= 19) { // 10^scale fits in 64 bits
             // 64 x 64 C4.w[0] * ten2k64[scale]
             __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
           } else { // 10^scale fits in 128 bits
             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
             __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
           }
         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
           // 64 x 128 ten2k64[scale] * CC43
           __mul_128x64_to_128 (res, ten2k64[scale], C4);
         }
         e4 = e4 - scale; // expmax
         q4 = q4 + scale;
       } else {
         res.w[1] = C4.w[1];
         res.w[0] = C4.w[0];
       }
       // res is the coefficient of the result rounded to the destination
       // precision, with unbounded exponent (it has q4 digits); the exponent
       // is e4 (exact result)
     }

     // check for overflow
     if (q4 + e4 > p34 + expmax) {
       if (rnd_mode == ROUNDING_TO_NEAREST) {
         res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
         res.w[0] = 0x0000000000000000ull;
         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
       } else {
         res.w[1] = p_sign | res.w[1];
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e4, &res, pfpsf);
       }
       *pfpsf |= save_fpsf;
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)
     }
     // check for underflow
     if (q4 + e4 < expmin + P34) {
       is_tiny = 1; // the result is tiny
       if (e4 < expmin) {
         // if e4 < expmin, we must truncate more of res
         x0 = expmin - e4; // x0 >= 1
         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
         is_midpoint_lt_even0 = is_midpoint_lt_even;
         is_midpoint_gt_even0 = is_midpoint_gt_even;
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
         // the number of decimal digits in res is q4
         if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
           if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
             round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
             if (incr_exp) {
               // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
               R64 = ten2k64[q4 - x0];
             }
             // res.w[1] = 0; (from above)
             res.w[0] = R64;
           } else { // if (q4 <= 34)
             // 19 <= q4 <= 38
             P128.w[1] = res.w[1];
             P128.w[0] = res.w[0];
             round128_19_38 (q4, x0, P128, &res, &incr_exp,
         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
         		    &is_inexact_lt_midpoint,
         		    &is_inexact_gt_midpoint);
             if (incr_exp) {
               // increase coefficient by a factor of 10; this will be <= 10^33
               // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
               if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
         	// res.w[1] = 0;
         	res.w[0] = ten2k64[q4 - x0];
               } else { // 20 <= q4 - x0 <= 37
         	res.w[0] = ten2k128[q4 - x0 - 20].w[0];
         	res.w[1] = ten2k128[q4 - x0 - 20].w[1];
               }
             }
           }
           e4 = e4 + x0; // expmin
         } else if (x0 == q4) {
           // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
           // determine relationship with 1/2 ulp
           if (q4 <= 19) {
             if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
               lt_half_ulp = 1;
               is_inexact_lt_midpoint = 1;
             } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
               eq_half_ulp = 1;
               is_midpoint_gt_even = 1;
             } else { // > 1/2 ulp
               // gt_half_ulp = 1;
               is_inexact_gt_midpoint = 1;
             }
           } else { // if (q4 <= 34)
             if (res.w[1] < midpoint128[q4 - 20].w[1] ||
                 (res.w[1] == midpoint128[q4 - 20].w[1] &&
                 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
               lt_half_ulp = 1;
               is_inexact_lt_midpoint = 1;
             } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
                 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
               eq_half_ulp = 1;
               is_midpoint_gt_even = 1;
             } else { // > 1/2 ulp
               // gt_half_ulp = 1;
               is_inexact_gt_midpoint = 1;
             }
           }
           if (lt_half_ulp || eq_half_ulp) {
             // res = +0.0 * 10^expmin
             res.w[1] = 0x0000000000000000ull;
             res.w[0] = 0x0000000000000000ull;
           } else { // if (gt_half_ulp)
             // res = +1 * 10^expmin
             res.w[1] = 0x0000000000000000ull;
             res.w[0] = 0x0000000000000001ull;
           }
           e4 = expmin;
         } else { // if (x0 > q4)
           // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
           res.w[1] = 0;
           res.w[0] = 0;
           e4 = expmin;
           is_inexact_lt_midpoint = 1;
         }
         // avoid a double rounding error
         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
             is_midpoint_lt_even) { // double rounding error upward
           // res = res - 1
           res.w[0]--;
           if (res.w[0] == 0xffffffffffffffffull)
             res.w[1]--;
           // Note: a double rounding error upward is not possible; for this
           // the result after the first rounding would have to be 99...95
           // (35 digits in all), possibly followed by a number of zeros; this
           // not possible for f * f + 0
           is_midpoint_lt_even = 0;
           is_inexact_lt_midpoint = 1;
         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
             is_midpoint_gt_even) { // double rounding error downward
           // res = res + 1
           res.w[0]++;
           if (res.w[0] == 0)
             res.w[1]++;
           is_midpoint_gt_even = 0;
           is_inexact_gt_midpoint = 1;
         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
           // if this second rounding was exact the result may still be
           // inexact because of the first rounding
           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
             is_inexact_gt_midpoint = 1;
           }
           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
             is_inexact_lt_midpoint = 1;
           }
         } else if (is_midpoint_gt_even &&
         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
           // pulled up to a midpoint
           is_inexact_lt_midpoint = 1;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else if (is_midpoint_lt_even &&
         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
           // pulled down to a midpoint
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else {
           ;
         }
       } else { // if e4 >= emin then q4 < P and the result is tiny and exact
         if (e3 < e4) {
           // if (e3 < e4) the preferred exponent is e3
           // return (C4 * 10^scale) * 10^(e4 - scale)
           // where scale = min (p34-q4, (e4 - e3))
           scale = p34 - q4;
           ind = e4 - e3;
           if (ind < scale)
             scale = ind;
           if (scale == 0) {
             ; // res and e4 are unchanged
           } else if (q4 <= 19) { // C4 fits in 64 bits
             if (scale <= 19) { // 10^scale fits in 64 bits
               // 64 x 64 res.w[0] * ten2k64[scale]
               __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
             } else { // 10^scale fits in 128 bits
               // 64 x 128 res.w[0] * ten2k128[scale - 20]
               __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
             }
           } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
             // 64 x 128 ten2k64[scale] * C3
             __mul_128x64_to_128 (res, ten2k64[scale], res);
           }
           // subtract scale from the exponent
           e4 = e4 - scale;
         }
       }

       // check for inexact result
       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
           is_midpoint_lt_even || is_midpoint_gt_even) {
         // set the inexact flag and the underflow flag
         *pfpsf |= INEXACT_EXCEPTION;
         *pfpsf |= UNDERFLOW_EXCEPTION;
       }
       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e4, &res, pfpsf);
       }
       *pfpsf |= save_fpsf;
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)
     }
     // no overflow, and no underflow for rounding to nearest
     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];

     if (rnd_mode != ROUNDING_TO_NEAREST) {
       rounding_correction (rnd_mode,
         		   is_inexact_lt_midpoint,
         		   is_inexact_gt_midpoint,
         		   is_midpoint_lt_even, is_midpoint_gt_even,
         		   e4, &res, pfpsf);
       // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
       if (e4 == expmin) {
         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
              res.w[0] < 0x38c15b0a00000000ull)) {
           is_tiny = 1;
         }
       }
     }

     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
         is_midpoint_lt_even || is_midpoint_gt_even) {
       // set the inexact flag
       *pfpsf |= INEXACT_EXCEPTION;
       if (is_tiny)
         *pfpsf |= UNDERFLOW_EXCEPTION;
     }

     if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
       // need to ensure that the result has the preferred exponent
       p_exp = res.w[1] & MASK_EXP;
       if (z_exp < p_exp) { // the preferred exponent is z_exp
         // signficand of res in C3
         C3.w[1] = res.w[1] & MASK_COEFF;
         C3.w[0] = res.w[0];
         // the number of decimal digits of x * y is q4 <= 34
         // Note: the coefficient fits in 128 bits

         // return (C3 * 10^scale) * 10^(p_exp - scale)
         // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
         scale = p34 - q4;
         ind = (p_exp - z_exp) >> 49;
         if (ind < scale)
           scale = ind;
         // subtract scale from the exponent
         p_exp = p_exp - ((UINT64) scale << 49);
         if (scale == 0) {
           ; // leave res unchanged
         } else if (q4 <= 19) { // x * y fits in 64 bits
           if (scale <= 19) { // 10^scale fits in 64 bits
             // 64 x 64 C3.w[0] * ten2k64[scale]
             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
           } else { // 10^scale fits in 128 bits
             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
           }
           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
         } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
           // 64 x 128 ten2k64[scale] * C3
           __mul_128x64_to_128 (res, ten2k64[scale], C3);
           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
         }
       } // else leave the result as it is, because p_exp <= z_exp
     }
     *pfpsf |= save_fpsf;
     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
     BID_SWAP128 (res);
     BID_RETURN (res)
   } // else we have f * f + f

   // continue with x = f, y = f, z = f

   delta = q3 + e3 - q4 - e4;
 delta_ge_zero:
   if (delta >= 0) {

     if (p34 <= delta - 1 ||	// Case (1')
         (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
       // check for overflow, which can occur only in Case (1')
       if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
         // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
         // condition for (q3 + e3) > (p34 + expmax)
         if (rnd_mode == ROUNDING_TO_NEAREST) {
           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
           res.w[0] = 0x0000000000000000ull;
           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
         } else {
           if (p_sign == z_sign) {
             is_inexact_lt_midpoint = 1;
           } else {
             is_inexact_gt_midpoint = 1;
           }
           // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
           scale = p34 - q3;
           if (scale == 0) {
             res.w[1] = z_sign | C3.w[1];
             res.w[0] = C3.w[0];
           } else {
             if (q3 <= 19) { // C3 fits in 64 bits
               if (scale <= 19) { // 10^scale fits in 64 bits
         	// 64 x 64 C3.w[0] * ten2k64[scale]
         	__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
               } else { // 10^scale fits in 128 bits
         	// 64 x 128 C3.w[0] * ten2k128[scale - 20]
         	__mul_128x64_to_128 (res, C3.w[0],
         			     ten2k128[scale - 20]);
               }
             } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
               // 64 x 128 ten2k64[scale] * C3
               __mul_128x64_to_128 (res, ten2k64[scale], C3);
             }
             // the coefficient in res has q3 + scale = p34 digits
           }
           e3 = e3 - scale;
           res.w[1] = z_sign | res.w[1];
           rounding_correction (rnd_mode,
         		       is_inexact_lt_midpoint,
         		       is_inexact_gt_midpoint,
         		       is_midpoint_lt_even, is_midpoint_gt_even,
         		       e3, &res, pfpsf);
         }
         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
         BID_SWAP128 (res);
         BID_RETURN (res)
       }
       // res = z
       if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
         // return (C3 * 10^scale) * 10^(z_exp - scale)
         // where scale = min (p34-q3, z_exp-EMIN)
         scale = p34 - q3;
         ind = e3 + 6176;
         if (ind < scale)
           scale = ind;
         if (scale == 0) {
           res.w[1] = C3.w[1];
           res.w[0] = C3.w[0];
         } else if (q3 <= 19) { // z fits in 64 bits
           if (scale <= 19) { // 10^scale fits in 64 bits
             // 64 x 64 C3.w[0] * ten2k64[scale]
             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
           } else { // 10^scale fits in 128 bits
             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
           }
         } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
           // 64 x 128 ten2k64[scale] * C3
           __mul_128x64_to_128 (res, ten2k64[scale], C3);
         }
         // the coefficient in res has q3 + scale digits
         // subtract scale from the exponent
         z_exp = z_exp - ((UINT64) scale << 49);
         e3 = e3 - scale;
         res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
         if (scale + q3 < p34)
           *pfpsf |= UNDERFLOW_EXCEPTION;
       } else {
         scale = 0;
         res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
         res.w[0] = C3.w[0];
       }

       // use the following to avoid double rounding errors when operating on
       // mixed formats in rounding to nearest, and for correcting the result
       // if not rounding to nearest
       if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
         // there is a gap of exactly one digit between the scaled C3 and C4
         // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
         if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
             (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
             (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
         		  C3.w[0] != ten2k128[q3 - 21].w[0]))) {
           // C3 * 10^ scale != 10^(q3-1)
           // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
           // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
           is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
         } else { // if C3 * 10^scale = 10^(q3+scale-1)
           // ok from above e3 = (z_exp >> 49) - 6176;
           // the result is always inexact
           if (q4 == 1) {
             R64 = C4.w[0];
           } else {
             // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
             // x = q4-1, 1 <= x <= 67 and check if this operation is exact
             if (q4 <= 18) { // 2 <= q4 <= 18
               round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
         		    &is_inexact_lt_midpoint,
         		    &is_inexact_gt_midpoint);
             } else if (q4 <= 38) {
               P128.w[1] = C4.w[1];
               P128.w[0] = C4.w[0];
               round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
         		      &is_midpoint_lt_even,
         		      &is_midpoint_gt_even,
         		      &is_inexact_lt_midpoint,
         		      &is_inexact_gt_midpoint);
               R64 = R128.w[0]; // one decimal digit
             } else if (q4 <= 57) {
               P192.w[2] = C4.w[2];
               P192.w[1] = C4.w[1];
               P192.w[0] = C4.w[0];
               round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
         		      &is_midpoint_lt_even,
         		      &is_midpoint_gt_even,
         		      &is_inexact_lt_midpoint,
         		      &is_inexact_gt_midpoint);
               R64 = R192.w[0]; // one decimal digit
             } else { // if (q4 <= 68)
               round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
         		      &is_midpoint_lt_even,
         		      &is_midpoint_gt_even,
         		      &is_inexact_lt_midpoint,
         		      &is_inexact_gt_midpoint);
               R64 = R256.w[0]; // one decimal digit
             }
             if (incr_exp) {
               R64 = 10;
             }
           }
           if (q4 == 1 && C4.w[0] == 5) {
             is_inexact_lt_midpoint = 0;
             is_inexact_gt_midpoint = 0;
             is_midpoint_lt_even = 1;
             is_midpoint_gt_even = 0;
           } else if ((e3 == expmin) ||
         	     R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
             // result does not change
             is_inexact_lt_midpoint = 0;
             is_inexact_gt_midpoint = 1;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else {
             is_inexact_lt_midpoint = 1;
             is_inexact_gt_midpoint = 0;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
             // result decremented is 10^(q3+scale) - 1
             if ((q3 + scale) <= 19) {
               res.w[1] = 0;
               res.w[0] = ten2k64[q3 + scale];
             } else { // if ((q3 + scale + 1) <= 35)
               res.w[1] = ten2k128[q3 + scale - 20].w[1];
               res.w[0] = ten2k128[q3 + scale - 20].w[0];
             }
             res.w[0] = res.w[0] - 1; // borrow never occurs
             z_exp = z_exp - EXP_P1;
             e3 = e3 - 1;
             res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
           }
           if (e3 == expmin) {
             if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
               ; // result not tiny (in round-to-nearest mode)
             } else {
               *pfpsf |= UNDERFLOW_EXCEPTION;
             }
           }
         } // end 10^(q3+scale-1)
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
       } else {
         if (p_sign == z_sign) {
           // if (z_sign), set as if for absolute value
           is_inexact_lt_midpoint = 1;
         } else { // if (p_sign != z_sign)
           // if (z_sign), set as if for absolute value
           is_inexact_gt_midpoint = 1;
         }
         *pfpsf |= INEXACT_EXCEPTION;
       }
       // the result is always inexact => set the inexact flag
       // Determine tininess:
       //    if (exp > expmin)
       //      the result is not tiny
       //    else // if exp = emin
       //      if (q3 + scale < p34)
       //        the result is tiny
       //      else // if (q3 + scale = p34)
       //        if (C3 * 10^scale > 10^33)
       //          the result is not tiny
       //        else // if C3 * 10^scale = 10^33
       //          if (xy * z > 0)
       //            the result is not tiny
       //          else // if (xy * z < 0)
       //            if (z > 0)
       //              if rnd_mode != RP
       //                the result is tiny
       //              else // if RP
       //                the result is not tiny
       //            else // if (z < 0)
       //              if rnd_mode != RM
       //                the result is tiny
       //              else // if RM
       //                the result is not tiny
       //              endif
       //            endif
       //          endif
       //        endif
       //      endif
       //    endif
       if ((e3 == expmin && (q3 + scale) < p34) ||
           (e3 == expmin && (q3 + scale) == p34 &&
           (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&	// 10^33_high
           res.w[0] == 0x38c15b0a00000000ull &&	// 10^33_low
           z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
           (z_sign && rnd_mode != ROUNDING_DOWN)))) {
         *pfpsf |= UNDERFLOW_EXCEPTION;
       }
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e3, &res, pfpsf);
       }
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else if (p34 == delta) { // Case (1''B)

       // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
       // and C3 can be scaled up to p34 digits if needed

       // scale C3 to p34 digits if needed
       scale = p34 - q3; // 0 <= scale <= p34 - 1
       if (scale == 0) {
         res.w[1] = C3.w[1];
         res.w[0] = C3.w[0];
       } else if (q3 <= 19) { // z fits in 64 bits
         if (scale <= 19) { // 10^scale fits in 64 bits
           // 64 x 64 C3.w[0] * ten2k64[scale]
           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
         } else { // 10^scale fits in 128 bits
           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
         }
       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
         // 64 x 128 ten2k64[scale] * C3
         __mul_128x64_to_128 (res, ten2k64[scale], C3);
       }
       // subtract scale from the exponent
       z_exp = z_exp - ((UINT64) scale << 49);
       e3 = e3 - scale;
       // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits

       // determine whether x * y is less than, equal to, or greater than
       // 1/2 ulp (z)
       if (q4 <= 19) {
         if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
           lt_half_ulp = 1;
         } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
           eq_half_ulp = 1;
         } else { // > 1/2 ulp
           gt_half_ulp = 1;
         }
       } else if (q4 <= 38) {
         if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
             (C4.w[1] == midpoint128[q4 - 20].w[1] &&
             C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
           lt_half_ulp = 1;
         } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
             C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
           eq_half_ulp = 1;
         } else { // > 1/2 ulp
           gt_half_ulp = 1;
         }
       } else if (q4 <= 58) {
         if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
             C4.w[1] < midpoint192[q4 - 39].w[1]) ||
             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
             C4.w[1] == midpoint192[q4 - 39].w[1] &&
             C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
           lt_half_ulp = 1;
         } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
             C4.w[1] == midpoint192[q4 - 39].w[1] &&
             C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
           eq_half_ulp = 1;
         } else { // > 1/2 ulp
           gt_half_ulp = 1;
         }
       } else {
         if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
             C4.w[2] < midpoint256[q4 - 59].w[2]) ||
             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
             C4.w[2] == midpoint256[q4 - 59].w[2] &&
             C4.w[1] < midpoint256[q4 - 59].w[1]) ||
             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
             C4.w[2] == midpoint256[q4 - 59].w[2] &&
             C4.w[1] == midpoint256[q4 - 59].w[1] &&
             C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
           lt_half_ulp = 1;
         } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
             C4.w[2] == midpoint256[q4 - 59].w[2] &&
             C4.w[1] == midpoint256[q4 - 59].w[1] &&
             C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
           eq_half_ulp = 1;
         } else { // > 1/2 ulp
           gt_half_ulp = 1;
         }
       }

       if (p_sign == z_sign) {
         if (lt_half_ulp) {
           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
           // use the following to avoid double rounding errors when operating on
           // mixed formats in rounding to nearest
           is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
         } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
           // add 1 ulp to the significand
           res.w[0]++;
           if (res.w[0] == 0x0ull)
             res.w[1]++;
           // check for rounding overflow, when coeff == 10^34
           if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
               res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
             e3 = e3 + 1;
             // coeff = 10^33
             z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
             res.w[1] = 0x0000314dc6448d93ull;
             res.w[0] = 0x38c15b0a00000000ull;
           }
           // end add 1 ulp
           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
           if (eq_half_ulp) {
             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
           } else {
             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
           }
         } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
           // leave unchanged
           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
           is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
         }
         // the result is always inexact, and never tiny
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
         // check for overflow
         if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
           res.w[0] = 0x0000000000000000ull;
           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
           *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
           *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
           *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
           *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
           BID_SWAP128 (res);
           BID_RETURN (res)
         }
         if (rnd_mode != ROUNDING_TO_NEAREST) {
           rounding_correction (rnd_mode,
         		       is_inexact_lt_midpoint,
         		       is_inexact_gt_midpoint,
         		       is_midpoint_lt_even, is_midpoint_gt_even,
         		       e3, &res, pfpsf);
           z_exp = res.w[1] & MASK_EXP;
         }
       } else { // if (p_sign != z_sign)
         // consider two cases, because C3 * 10^scale = 10^33 is a special case
         if (res.w[1] != 0x0000314dc6448d93ull ||
             res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
           if (lt_half_ulp) {
             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             // use the following to avoid double rounding errors when operating
             // on mixed formats in rounding to nearest
             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
           } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
             // subtract 1 ulp from the significand
             res.w[0]--;
             if (res.w[0] == 0xffffffffffffffffull)
               res.w[1]--;
             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             if (eq_half_ulp) {
               is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
             } else {
               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
             }
           } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
             // leave unchanged
             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
           }
           // the result is always inexact, and never tiny
           // check for overflow for RN
           if (e3 > expmax) {
             if (rnd_mode == ROUNDING_TO_NEAREST) {
               res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
               res.w[0] = 0x0000000000000000ull;
               *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
             } else {
               rounding_correction (rnd_mode,
         			   is_inexact_lt_midpoint,
         			   is_inexact_gt_midpoint,
         			   is_midpoint_lt_even,
         			   is_midpoint_gt_even, e3, &res,
         			   pfpsf);
             }
             *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
             *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
             *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
             *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
             BID_SWAP128 (res);
             BID_RETURN (res)
           }
           // set the inexact flag
           *pfpsf |= INEXACT_EXCEPTION;
           if (rnd_mode != ROUNDING_TO_NEAREST) {
             rounding_correction (rnd_mode,
         			 is_inexact_lt_midpoint,
         			 is_inexact_gt_midpoint,
         			 is_midpoint_lt_even,
         			 is_midpoint_gt_even, e3, &res, pfpsf);
           }
           z_exp = res.w[1] & MASK_EXP;
         } else { // if C3 * 10^scale = 10^33
           e3 = (z_exp >> 49) - 6176;
           if (e3 > expmin) {
             // the result is exact if exp > expmin and C4 = d*10^(q4-1),
             // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
             if (q4 == 1) {
               // if q4 = 1 the result is exact
               // result coefficient = 10^34 - C4
               res.w[1] = 0x0001ed09bead87c0ull;
               res.w[0] = 0x378d8e6400000000ull - C4.w[0];
               z_exp = z_exp - EXP_P1;
               e3 = e3 - 1;
               res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             } else {
               // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
               // x = q4-1, 1 <= x <= 67 and check if this operation is exact
               if (q4 <= 18) { // 2 <= q4 <= 18
         	round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
         		      &is_midpoint_lt_even,
         		      &is_midpoint_gt_even,
         		      &is_inexact_lt_midpoint,
         		      &is_inexact_gt_midpoint);
               } else if (q4 <= 38) {
         	P128.w[1] = C4.w[1];
         	P128.w[0] = C4.w[0];
         	round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
         			&is_midpoint_lt_even,
         			&is_midpoint_gt_even,
         			&is_inexact_lt_midpoint,
         			&is_inexact_gt_midpoint);
         	R64 = R128.w[0]; // one decimal digit
               } else if (q4 <= 57) {
         	P192.w[2] = C4.w[2];
         	P192.w[1] = C4.w[1];
         	P192.w[0] = C4.w[0];
         	round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
         			&is_midpoint_lt_even,
         			&is_midpoint_gt_even,
         			&is_inexact_lt_midpoint,
         			&is_inexact_gt_midpoint);
         	R64 = R192.w[0]; // one decimal digit
               } else { // if (q4 <= 68)
         	round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
         			&is_midpoint_lt_even,
         			&is_midpoint_gt_even,
         			&is_inexact_lt_midpoint,
         			&is_inexact_gt_midpoint);
         	R64 = R256.w[0]; // one decimal digit
               }
               if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
         	// the result is exact: 10^34 - R64
         	// incr_exp = 0 with certainty
         	z_exp = z_exp - EXP_P1;
         	e3 = e3 - 1;
         	res.w[1] =
         	  z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
         	res.w[0] = 0x378d8e6400000000ull - R64;
               } else {
         	// We want R64 to be the top digit of C4, but we actually
         	// obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
         	// because the top digit is (C4 * 10^(-q4+1))RZ
         	// however, if incr_exp = 1 then R64 = 10 with certainty
         	if (incr_exp) {
         	  R64 = 10;
         	}
         	// the result is inexact as C4 has more than 1 significant digit
         	// and C3 * 10^scale = 10^33
         	// example of case that is treated here:
         	// 100...0 * 10^e3 - 0.41 * 10^e3 =
         	// 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
         	// note that (e3 > expmin}
         	// in order to round, subtract R64 from 10^34 and then compare
         	// C4 - R64 * 10^(q4-1) with 1/2 ulp
         	// calculate 10^34 - R64
         	res.w[1] = 0x0001ed09bead87c0ull;
         	res.w[0] = 0x378d8e6400000000ull - R64;
         	z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
         	// calculate C4 - R64 * 10^(q4-1); this is a rare case and
         	// R64 is small, 1 <= R64 <= 9
         	e3 = e3 - 1;
         	if (is_inexact_lt_midpoint) {
         	  is_inexact_lt_midpoint = 0;
         	  is_inexact_gt_midpoint = 1;
         	} else if (is_inexact_gt_midpoint) {
         	  is_inexact_gt_midpoint = 0;
         	  is_inexact_lt_midpoint = 1;
         	} else if (is_midpoint_lt_even) {
         	  is_midpoint_lt_even = 0;
         	  is_midpoint_gt_even = 1;
         	} else if (is_midpoint_gt_even) {
         	  is_midpoint_gt_even = 0;
         	  is_midpoint_lt_even = 1;
         	} else {
         	  ;
         	}
         	// the result is always inexact, and never tiny
         	// check for overflow for RN
         	if (e3 > expmax) {
         	  if (rnd_mode == ROUNDING_TO_NEAREST) {
         	    res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
         	    res.w[0] = 0x0000000000000000ull;
         	    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
         	  } else {
         	    rounding_correction (rnd_mode,
         				 is_inexact_lt_midpoint,
         				 is_inexact_gt_midpoint,
         				 is_midpoint_lt_even,
         				 is_midpoint_gt_even, e3, &res,
         				 pfpsf);
         	  }
         	  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
         	  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
         	  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
         	  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
         	  BID_SWAP128 (res);
         	  BID_RETURN (res)
         	}
         	// set the inexact flag
         	*pfpsf |= INEXACT_EXCEPTION;
         	res.w[1] =
         	  z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
         	if (rnd_mode != ROUNDING_TO_NEAREST) {
         	  rounding_correction (rnd_mode,
         			       is_inexact_lt_midpoint,
         			       is_inexact_gt_midpoint,
         			       is_midpoint_lt_even,
         			       is_midpoint_gt_even, e3, &res,
         			       pfpsf);
         	}
         	z_exp = res.w[1] & MASK_EXP;
               } // end result is inexact
             } // end q4 > 1
           } else { // if (e3 = emin)
             // if e3 = expmin the result is also tiny (the condition for
             // tininess is C4 > 050...0 [q4 digits] which is met because
             // the msd of C4 is not zero)
             // the result is tiny and inexact in all rounding modes;
             // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
             // gt_half_ulp to calculate)
             // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged

             // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
             if (gt_half_ulp) { // res = 10^33 - 1
               res.w[1] = 0x0000314dc6448d93ull;
               res.w[0] = 0x38c15b09ffffffffull;
             } else {
               res.w[1] = 0x0000314dc6448d93ull;
               res.w[0] = 0x38c15b0a00000000ull;
             }
             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
             *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later

             if (eq_half_ulp) {
               is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
             } else if (lt_half_ulp) {
               is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
             } else { // if (gt_half_ulp)
               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
             }

             if (rnd_mode != ROUNDING_TO_NEAREST) {
               rounding_correction (rnd_mode,
                   is_inexact_lt_midpoint,
                   is_inexact_gt_midpoint,
                   is_midpoint_lt_even,
                   is_midpoint_gt_even, e3, &res,
                   pfpsf);
               z_exp = res.w[1] & MASK_EXP;
             }
           } // end e3 = emin
           // set the inexact flag (if the result was not exact)
           if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
               is_midpoint_lt_even || is_midpoint_gt_even)
             *pfpsf |= INEXACT_EXCEPTION;
         } // end 10^33
       } // end if (p_sign != z_sign)
       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
         (q3 <= delta && delta + q4 <= p34) || // Case (3)
         (delta < q3 && p34 < delta + q4) || // Case (4)
         (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
         (delta + q4 < q3)) && // Case (6)
         !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)

       // the result has the sign of z

       if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
           (delta < q3 && p34 < delta + q4)) { // Case (4)
         // round first the sum x * y + z with unbounded exponent
         // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
         // 1 <= scale <= 33
         // calculate res = C3 * 10^scale
         scale = p34 - q3;
         x0 = delta + q4 - p34;
       } else if (delta + q4 < q3) { // Case (6)
         // make Case (6) look like Case (3) or Case (5) with scale = 0
         // by scaling up C4 by 10^(q3 - delta - q4)
         scale = q3 - delta - q4; // 1 <= scale <= 33
         if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
           if (scale <= 19) { // 10^scale fits in 64 bits
             // 64 x 64 C4.w[0] * ten2k64[scale]
             __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
           } else { // 10^scale fits in 128 bits
             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
             __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
           }
         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
           // 64 x 128 ten2k64[scale] * C4
           __mul_128x64_to_128 (P128, ten2k64[scale], C4);
         }
         C4.w[0] = P128.w[0];
         C4.w[1] = P128.w[1];
         // e4 does not need adjustment, as it is not used from this point on
         scale = 0;
         x0 = 0;
         // now Case (6) looks like Case (3) or Case (5) with scale = 0
       } else { // if Case (3) or Case (5)
         // Note: Case (3) is similar to Case (2), but scale differs and the
         // result is exact, unless it is tiny (so x0 = 0 when calculating the
         // result with unbounded exponent)

         // calculate first the sum x * y + z with unbounded exponent (exact)
         // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
         // 1 <= scale <= 33
         // calculate res = C3 * 10^scale
         scale = delta + q4 - q3;
         x0 = 0;
         // Note: the comments which follow refer [mainly] to Case (2)]
       }

     case2_repeat:
       if (scale == 0) { // this could happen e.g. if we return to case2_repeat
         // or in Case (4)
         res.w[1] = C3.w[1];
         res.w[0] = C3.w[0];
       } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
         if (scale <= 19) { // 10^scale fits in 64 bits
           // 64 x 64 C3.w[0] * ten2k64[scale]
           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
         } else { // 10^scale fits in 128 bits
           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
         }
       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
         // 64 x 128 ten2k64[scale] * C3
         __mul_128x64_to_128 (res, ten2k64[scale], C3);
       }
       // e3 is already calculated
       e3 = e3 - scale;
       // now res = C3 * 10^scale and e3 = e3 - scale
       // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
       // because the result was too small

       // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
       // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
       // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
       // the rounding fits in 128 bits!)
       // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
       // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
       if (x0 == 0) { // this could happen only if we return to case2_repeat, or
         // for Case (3) or Case (6)
         R128.w[1] = C4.w[1];
         R128.w[0] = C4.w[0];
       } else if (q4 <= 18) {
         // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
         round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
             &is_midpoint_lt_even, &is_midpoint_gt_even,
             &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
         if (incr_exp) {
           // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
           R64 = ten2k64[q4 - x0];
         }
         R128.w[1] = 0;
         R128.w[0] = R64;
       } else if (q4 <= 38) {
         // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
         P128.w[1] = C4.w[1];
         P128.w[0] = C4.w[0];
         round128_19_38 (q4, x0, P128, &R128, &incr_exp,
             &is_midpoint_lt_even, &is_midpoint_gt_even,
             &is_inexact_lt_midpoint,
             &is_inexact_gt_midpoint);
         if (incr_exp) {
           // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
             R128.w[0] = ten2k64[q4 - x0];
             // R128.w[1] stays 0
           } else { // 20 <= q4 - x0 <= 37
             R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
             R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
           }
         }
       } else if (q4 <= 57) {
         // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
         P192.w[2] = C4.w[2];
         P192.w[1] = C4.w[1];
         P192.w[0] = C4.w[0];
         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
             &is_midpoint_lt_even, &is_midpoint_gt_even,
             &is_inexact_lt_midpoint,
             &is_inexact_gt_midpoint);
         // R192.w[2] is always 0
         if (incr_exp) {
           // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
             R192.w[0] = ten2k64[q4 - x0];
             // R192.w[1] stays 0
             // R192.w[2] stays 0
           } else { // 20 <= q4 - x0 <= 33
             R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
             R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
             // R192.w[2] stays 0
           }
         }
         R128.w[1] = R192.w[1];
         R128.w[0] = R192.w[0];
       } else {
         // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
             &is_midpoint_lt_even, &is_midpoint_gt_even,
             &is_inexact_lt_midpoint,
             &is_inexact_gt_midpoint);
         // R256.w[3] and R256.w[2] are always 0
         if (incr_exp) {
           // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
             R256.w[0] = ten2k64[q4 - x0];
             // R256.w[1] stays 0
             // R256.w[2] stays 0
             // R256.w[3] stays 0
           } else { // 20 <= q4 - x0 <= 33
             R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
             R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
             // R256.w[2] stays 0
             // R256.w[3] stays 0
           }
         }
         R128.w[1] = R256.w[1];
         R128.w[0] = R256.w[0];
       }
       // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
       // rounded to nearest, which were copied into R128
       if (z_sign == p_sign) {
         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
         // the sum can result in [up to] p34 or p34 + 1 digits
         res.w[0] = res.w[0] + R128.w[0];
         res.w[1] = res.w[1] + R128.w[1];
         if (res.w[0] < R128.w[0])
           res.w[1]++; // carry
         // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
         if (res.w[1] > 0x0001ed09bead87c0ull ||
             (res.w[1] == 0x0001ed09bead87c0ull &&
              res.w[0] > 0x378d8e63ffffffffull)) {
           // avoid double rounding error
           is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
           is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
           is_midpoint_lt_even0 = is_midpoint_lt_even;
           is_midpoint_gt_even0 = is_midpoint_gt_even;
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
           P128.w[1] = res.w[1];
           P128.w[0] = res.w[0];
           round128_19_38 (35, 1, P128, &res, &incr_exp,
               &is_midpoint_lt_even, &is_midpoint_gt_even,
               &is_inexact_lt_midpoint,
               &is_inexact_gt_midpoint);
           // incr_exp is 0 with certainty in this case
           // avoid a double rounding error
           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
               is_midpoint_lt_even) { // double rounding error upward
             // res = res - 1
             res.w[0]--;
             if (res.w[0] == 0xffffffffffffffffull)
               res.w[1]--;
             // Note: a double rounding error upward is not possible; for this
             // the result after the first rounding would have to be 99...95
             // (35 digits in all), possibly followed by a number of zeros; this
             // not possible in Cases (2)-(6) or (15)-(17) which may get here
             is_midpoint_lt_even = 0;
             is_inexact_lt_midpoint = 1;
           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
               is_midpoint_gt_even) { // double rounding error downward
             // res = res + 1
             res.w[0]++;
             if (res.w[0] == 0)
               res.w[1]++;
             is_midpoint_gt_even = 0;
             is_inexact_gt_midpoint = 1;
           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	     !is_inexact_lt_midpoint
         	     && !is_inexact_gt_midpoint) {
             // if this second rounding was exact the result may still be
             // inexact because of the first rounding
             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
               is_inexact_gt_midpoint = 1;
             }
             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
               is_inexact_lt_midpoint = 1;
             }
           } else if (is_midpoint_gt_even &&
         	     (is_inexact_gt_midpoint0
         	      || is_midpoint_lt_even0)) {
             // pulled up to a midpoint
             is_inexact_lt_midpoint = 1;
             is_inexact_gt_midpoint = 0;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else if (is_midpoint_lt_even &&
         	     (is_inexact_lt_midpoint0
         	      || is_midpoint_gt_even0)) {
             // pulled down to a midpoint
             is_inexact_lt_midpoint = 0;
             is_inexact_gt_midpoint = 1;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else {
             ;
           }
           // adjust exponent
           e3 = e3 + 1;
           if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
               !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
             if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
         	is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
               is_inexact_lt_midpoint = 1;
             }
           }
         } else {
           // this is the result rounded with unbounded exponent, unless a
           // correction is needed
           res.w[1] = res.w[1] & MASK_COEFF;
           if (lsb == 1) {
             if (is_midpoint_gt_even) {
               // res = res + 1
               is_midpoint_gt_even = 0;
               is_midpoint_lt_even = 1;
               res.w[0]++;
               if (res.w[0] == 0x0)
         	res.w[1]++;
               // check for rounding overflow
               if (res.w[1] == 0x0001ed09bead87c0ull &&
         	  res.w[0] == 0x378d8e6400000000ull) {
         	// res = 10^34 => rounding overflow
         	res.w[1] = 0x0000314dc6448d93ull;
         	res.w[0] = 0x38c15b0a00000000ull; // 10^33
         	e3++;
               }
             } else if (is_midpoint_lt_even) {
               // res = res - 1
               is_midpoint_lt_even = 0;
               is_midpoint_gt_even = 1;
               res.w[0]--;
               if (res.w[0] == 0xffffffffffffffffull)
         	res.w[1]--;
               // if the result is pure zero, the sign depends on the rounding
               // mode (x*y and z had opposite signs)
               if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
         	if (rnd_mode != ROUNDING_DOWN)
         	  z_sign = 0x0000000000000000ull;
         	else
         	  z_sign = 0x8000000000000000ull;
         	// the exponent is max (e3, expmin)
         	res.w[1] = 0x0;
         	res.w[0] = 0x0;
         	*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
         	*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
         	*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
         	*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
         	BID_SWAP128 (res);
         	BID_RETURN (res)
               }
             } else {
               ;
             }
           }
         }
       } else { // if (z_sign != p_sign)
         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
         // used to swap rounding indicators if p_sign != z_sign
         // the sum can result in [up to] p34 or p34 - 1 digits
         tmp64 = res.w[0];
         res.w[0] = res.w[0] - R128.w[0];
         res.w[1] = res.w[1] - R128.w[1];
         if (res.w[0] > tmp64)
           res.w[1]--; // borrow
         // if res < 10^33 and exp > expmin need to decrease x0 and
         // increase scale by 1
         if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
         		     (res.w[1] == 0x0000314dc6448d93ull &&
         		      res.w[0] < 0x38c15b0a00000000ull)) ||
         		    (is_inexact_lt_midpoint
         		     && res.w[1] == 0x0000314dc6448d93ull
         		     && res.w[0] == 0x38c15b0a00000000ull))
             && x0 >= 1) {
           x0 = x0 - 1;
           // first restore e3, otherwise it will be too small
           e3 = e3 + scale;
           scale = scale + 1;
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
           incr_exp = 0;
           goto case2_repeat;
         }
         // else this is the result rounded with unbounded exponent;
         // because the result has opposite sign to that of C4 which was
         // rounded, need to change the rounding indicators
         if (is_inexact_lt_midpoint) {
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
         } else if (is_inexact_gt_midpoint) {
           is_inexact_gt_midpoint = 0;
           is_inexact_lt_midpoint = 1;
         } else if (lsb == 0) {
           if (is_midpoint_lt_even) {
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 1;
           } else if (is_midpoint_gt_even) {
             is_midpoint_gt_even = 0;
             is_midpoint_lt_even = 1;
           } else {
             ;
           }
         } else if (lsb == 1) {
           if (is_midpoint_lt_even) {
             // res = res + 1
             res.w[0]++;
             if (res.w[0] == 0x0)
               res.w[1]++;
             // check for rounding overflow
             if (res.w[1] == 0x0001ed09bead87c0ull &&
         	res.w[0] == 0x378d8e6400000000ull) {
               // res = 10^34 => rounding overflow
               res.w[1] = 0x0000314dc6448d93ull;
               res.w[0] = 0x38c15b0a00000000ull; // 10^33
               e3++;
             }
           } else if (is_midpoint_gt_even) {
             // res = res - 1
             res.w[0]--;
             if (res.w[0] == 0xffffffffffffffffull)
               res.w[1]--;
             // if the result is pure zero, the sign depends on the rounding
             // mode (x*y and z had opposite signs)
             if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
               if (rnd_mode != ROUNDING_DOWN)
         	z_sign = 0x0000000000000000ull;
               else
         	z_sign = 0x8000000000000000ull;
               // the exponent is max (e3, expmin)
               res.w[1] = 0x0;
               res.w[0] = 0x0;
               *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
               *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
               *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
               *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
               BID_SWAP128 (res);
               BID_RETURN (res)
             }
           } else {
             ;
           }
         } else {
           ;
         }
       }
       // check for underflow
       if (e3 == expmin) { // and if significand < 10^33 => result is tiny
         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
              res.w[0] < 0x38c15b0a00000000ull)) {
           is_tiny = 1;
         }
       } else if (e3 < expmin) {
         // the result is tiny, so we must truncate more of res
         is_tiny = 1;
         x0 = expmin - e3;
         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
         is_midpoint_lt_even0 = is_midpoint_lt_even;
         is_midpoint_gt_even0 = is_midpoint_gt_even;
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
         // determine the number of decimal digits in res
         if (res.w[1] == 0x0) {
           // between 1 and 19 digits
           for (ind = 1; ind <= 19; ind++) {
             if (res.w[0] < ten2k64[ind]) {
               break;
             }
           }
           // ind digits
         } else if (res.w[1] < ten2k128[0].w[1] ||
         	   (res.w[1] == ten2k128[0].w[1]
         	    && res.w[0] < ten2k128[0].w[0])) {
           // 20 digits
           ind = 20;
         } else { // between 21 and 38 digits
           for (ind = 1; ind <= 18; ind++) {
             if (res.w[1] < ten2k128[ind].w[1] ||
         	(res.w[1] == ten2k128[ind].w[1] &&
         	 res.w[0] < ten2k128[ind].w[0])) {
               break;
             }
           }
           // ind + 20 digits
           ind = ind + 20;
         }

         // at this point ind >= x0; because delta >= 2 on this path, the case
         // ind = x0 can occur only in Case (2) or case (3), when C3 has one
         // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
         // the signs of x * y and z are opposite, and through cancellation
         // the most significant decimal digit in res has the weight
         // 10^(emin-1); however, it is clear that in this case the most
         // significant digit is 9, so the result before rounding is
         // 0.9... * 10^emin
         // Otherwise, ind > x0 because there are non-zero decimal digits in the
         // result with weight of at least 10^emin, and correction for underflow
         //  can be carried out using the round*_*_2_* () routines
         if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
           res.w[1] = 0x0;
           res.w[0] = 0x1;
           is_inexact_gt_midpoint = 1;
         } else if (ind <= 18) { // check that 2 <= ind
           // 2 <= ind <= 18, 1 <= x0 <= 17
           round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
           if (incr_exp) {
             // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
             R64 = ten2k64[ind - x0];
           }
           res.w[1] = 0;
           res.w[0] = R64;
         } else if (ind <= 38) {
           // 19 <= ind <= 38
           P128.w[1] = res.w[1];
           P128.w[0] = res.w[0];
           round128_19_38 (ind, x0, P128, &res, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
           if (incr_exp) {
             // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
             if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
               res.w[0] = ten2k64[ind - x0];
               // res.w[1] stays 0
             } else { // 20 <= ind - x0 <= 37
               res.w[0] = ten2k128[ind - x0 - 20].w[0];
               res.w[1] = ten2k128[ind - x0 - 20].w[1];
             }
           }
         }
         // avoid a double rounding error
         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
             is_midpoint_lt_even) { // double rounding error upward
           // res = res - 1
           res.w[0]--;
           if (res.w[0] == 0xffffffffffffffffull)
             res.w[1]--;
           // Note: a double rounding error upward is not possible; for this
           // the result after the first rounding would have to be 99...95
           // (35 digits in all), possibly followed by a number of zeros; this
           // not possible in Cases (2)-(6) which may get here
           is_midpoint_lt_even = 0;
           is_inexact_lt_midpoint = 1;
         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
             is_midpoint_gt_even) { // double rounding error downward
           // res = res + 1
           res.w[0]++;
           if (res.w[0] == 0)
             res.w[1]++;
           is_midpoint_gt_even = 0;
           is_inexact_gt_midpoint = 1;
         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
           // if this second rounding was exact the result may still be
           // inexact because of the first rounding
           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
             is_inexact_gt_midpoint = 1;
           }
           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
             is_inexact_lt_midpoint = 1;
           }
         } else if (is_midpoint_gt_even &&
         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
           // pulled up to a midpoint
           is_inexact_lt_midpoint = 1;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else if (is_midpoint_lt_even &&
         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
           // pulled down to a midpoint
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else {
           ;
         }
         // adjust exponent
         e3 = e3 + x0;
         if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
           if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
               is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
             is_inexact_lt_midpoint = 1;
           }
         }
       } else {
         ; // not underflow
       }
       // check for inexact result
       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
           is_midpoint_lt_even || is_midpoint_gt_even) {
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
         if (is_tiny)
           *pfpsf |= UNDERFLOW_EXCEPTION;
       }
       // now check for significand = 10^34 (may have resulted from going
       // back to case2_repeat)
       if (res.w[1] == 0x0001ed09bead87c0ull &&
           res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
         res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
         res.w[0] = 0x38c15b0a00000000ull;
         e3 = e3 + 1;
       }
       res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
       // check for overflow
       if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
         res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
         res.w[0] = 0x0000000000000000ull;
         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
       }
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e3, &res, pfpsf);
       }
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else {

       // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
       // the signs of x*y and z are opposite; in these cases massive
       // cancellation can occur, so it is better to scale either C3 or C4 and
       // to perform the subtraction before rounding; rounding is performed
       // next, depending on the number of decimal digits in the result and on
       // the exponent value
       // Note: overlow is not possible in this case
       // this is similar to Cases (15), (16), and (17)

       if (delta + q4 < q3) { // from Case (6)
         // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
         // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
         // and call add_and_round; delta stays positive
         // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
         P128.w[1] = C3.w[1];
         P128.w[0] = C3.w[0];
         C3.w[1] = C4.w[1];
         C3.w[0] = C4.w[0];
         C4.w[1] = P128.w[1];
         C4.w[0] = P128.w[0];
         ind = q3;
         q3 = q4;
         q4 = ind;
         ind = e3;
         e3 = e4;
         e4 = ind;
         tmp_sign = z_sign;
         z_sign = p_sign;
         p_sign = tmp_sign;
       } else { // from Cases (2), (3), (4), (5)
         // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
         // scaled up by q4 + delta - q3; this is the same as in Cases (15),
         // (16), and (17) if we just change the sign of delta
         delta = -delta;
       }
       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
         	     rnd_mode, &is_midpoint_lt_even,
         	     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
         	     &is_inexact_gt_midpoint, pfpsf, &res);
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     }

   } else { // if delta < 0

     delta = -delta;

     if (p34 < q4 && q4 <= delta) { // Case (7)

       // truncate C4 to p34 digits into res
       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
       x0 = q4 - p34;
       if (q4 <= 38) {
         P128.w[1] = C4.w[1];
         P128.w[0] = C4.w[0];
         round128_19_38 (q4, x0, P128, &res, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
       } else if (q4 <= 57) { // 35 <= q4 <= 57
         P192.w[2] = C4.w[2];
         P192.w[1] = C4.w[1];
         P192.w[0] = C4.w[0];
         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         res.w[0] = R192.w[0];
         res.w[1] = R192.w[1];
       } else { // if (q4 <= 68)
         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         res.w[0] = R256.w[0];
         res.w[1] = R256.w[1];
       }
       e4 = e4 + x0;
       if (incr_exp) {
         e4 = e4 + 1;
       }
       if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
           !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
         // if C4 rounded to p34 digits is exact then the result is inexact,
         // in a way that depends on the signs of x * y and z
         if (p_sign == z_sign) {
           is_inexact_lt_midpoint = 1;
         } else { // if (p_sign != z_sign)
           if (res.w[1] != 0x0000314dc6448d93ull ||
               res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
             is_inexact_gt_midpoint = 1;
           } else { // res = 10^33 and exact is a special case
             // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
             // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
             // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
             // Note: ulp is really ulp/10 (after borrow which propagates to msd)
             if (delta > p34 + 1) { // C3 < 1/2
               // res = 10^33, unchanged
               is_inexact_gt_midpoint = 1;
             } else { // if (delta == p34 + 1)
               if (q3 <= 19) {
         	if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
         	  // res = 10^33, unchanged
         	  is_inexact_gt_midpoint = 1;
         	} else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
         	  // res = 10^33, unchanged
         	  is_midpoint_lt_even = 1;
         	} else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
         	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
         	  res.w[0] = 0x378d8e63ffffffffull;
         	  e4 = e4 - 1;
         	  is_inexact_lt_midpoint = 1;
         	}
               } else { // if (20 <= q3 <=34)
         	if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
                     (C3.w[1] == midpoint128[q3 - 20].w[1] &&
                     C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
         	  // res = 10^33, unchanged
         	  is_inexact_gt_midpoint = 1;
         	} else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
                     C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
         	  // res = 10^33, unchanged
         	  is_midpoint_lt_even = 1;
         	} else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
         	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
         	  res.w[0] = 0x378d8e63ffffffffull;
         	  e4 = e4 - 1;
         	  is_inexact_lt_midpoint = 1;
         	}
               }
             }
           }
         }
       } else if (is_midpoint_lt_even) {
         if (z_sign != p_sign) {
           // needs correction: res = res - 1
           res.w[0] = res.w[0] - 1;
           if (res.w[0] == 0xffffffffffffffffull)
             res.w[1]--;
           // if it is (10^33-1)*10^e4 then the corect result is
           // (10^34-1)*10(e4-1)
           if (res.w[1] == 0x0000314dc6448d93ull &&
               res.w[0] == 0x38c15b09ffffffffull) {
             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
             res.w[0] = 0x378d8e63ffffffffull;
             e4 = e4 - 1;
           }
           is_midpoint_lt_even = 0;
           is_inexact_lt_midpoint = 1;
         } else { // if (z_sign == p_sign)
           is_midpoint_lt_even = 0;
           is_inexact_gt_midpoint = 1;
         }
       } else if (is_midpoint_gt_even) {
         if (z_sign == p_sign) {
           // needs correction: res = res + 1 (cannot cross in the next binade)
           res.w[0] = res.w[0] + 1;
           if (res.w[0] == 0x0000000000000000ull)
             res.w[1]++;
           is_midpoint_gt_even = 0;
           is_inexact_gt_midpoint = 1;
         } else { // if (z_sign != p_sign)
           is_midpoint_gt_even = 0;
           is_inexact_lt_midpoint = 1;
         }
       } else {
         ; // the rounded result is already correct
       }
       // check for overflow
       if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
         res.w[1] = p_sign | 0x7800000000000000ull;
         res.w[0] = 0x0000000000000000ull;
         *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
       } else { // no overflow or not RN
         p_exp = ((UINT64) (e4 + 6176) << 49);
         res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
       }
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e4, &res, pfpsf);
       }
       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
           is_midpoint_lt_even || is_midpoint_gt_even) {
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
       }
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
         (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
         (q4 <= delta && delta + q3 <= p34) || // Case (10)
         (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
         (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
         (delta + q3 < q4 && q4 <= p34)) { // Case (18)

       // Case (8) is similar to Case (1), with C3 and C4 swapped
       // Case (9) is similar to Case (2), with C3 and C4 swapped
       // Case (10) is similar to Case (3), with C3 and C4 swapped
       // Case (13) is similar to Case (4), with C3 and C4 swapped
       // Case (14) is similar to Case (5), with C3 and C4 swapped
       // Case (18) is similar to Case (6), with C3 and C4 swapped

       // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
       // and go back to delta_ge_zero
       // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
       P128.w[1] = C3.w[1];
       P128.w[0] = C3.w[0];
       C3.w[1] = C4.w[1];
       C3.w[0] = C4.w[0];
       C4.w[1] = P128.w[1];
       C4.w[0] = P128.w[0];
       ind = q3;
       q3 = q4;
       q4 = ind;
       ind = e3;
       e3 = e4;
       e4 = ind;
       tmp_sign = z_sign;
       z_sign = p_sign;
       p_sign = tmp_sign;
       tmp.ui64 = z_exp;
       z_exp = p_exp;
       p_exp = tmp.ui64;
       goto delta_ge_zero;

     } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
                (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)

       // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
       // 1 <= x0 <= q3 - 1 <= p34 - 1
       x0 = e4 - e3; // or x0 = delta + q3 - q4
       if (q3 <= 18) { // 2 <= q3 <= 18
         round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
         // C3.w[1] = 0;
         C3.w[0] = R64;
       } else if (q3 <= 38) {
         round128_19_38 (q3, x0, C3, &R128, &incr_exp,
         		&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint);
         C3.w[1] = R128.w[1];
         C3.w[0] = R128.w[0];
       }
       // the rounded result has q3 - x0 digits
       // we want the exponent to be e4, so if incr_exp = 1 then
       // multiply the rounded result by 10 - it will still fit in 113 bits
       if (incr_exp) {
         // 64 x 128 -> 128
         P128.w[1] = C3.w[1];
         P128.w[0] = C3.w[0];
         __mul_64x128_to_128 (C3, ten2k64[1], P128);
       }
       e3 = e3 + x0; // this is e4
       // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
       // the result will have the sign of x * y; the exponent is e4
       R256.w[3] = 0;
       R256.w[2] = 0;
       R256.w[1] = C3.w[1];
       R256.w[0] = C3.w[0];
       if (p_sign == z_sign) { // R256 = C4 + R256
         add256 (C4, R256, &R256);
       } else { // if (p_sign != z_sign) { // R256 = C4 - R256
         sub256 (C4, R256, &R256); // the result cannot be pure zero
         // because the result has opposite sign to that of R256 which was
         // rounded, need to change the rounding indicators
         lsb = C4.w[0] & 0x01;
         if (is_inexact_lt_midpoint) {
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
         } else if (is_inexact_gt_midpoint) {
           is_inexact_gt_midpoint = 0;
           is_inexact_lt_midpoint = 1;
         } else if (lsb == 0) {
           if (is_midpoint_lt_even) {
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 1;
           } else if (is_midpoint_gt_even) {
             is_midpoint_gt_even = 0;
             is_midpoint_lt_even = 1;
           } else {
             ;
           }
         } else if (lsb == 1) {
           if (is_midpoint_lt_even) {
             // res = res + 1
             R256.w[0]++;
             if (R256.w[0] == 0x0) {
               R256.w[1]++;
               if (R256.w[1] == 0x0) {
         	R256.w[2]++;
         	if (R256.w[2] == 0x0) {
         	  R256.w[3]++;
         	}
               }
             }
             // no check for rounding overflow - R256 was a difference
           } else if (is_midpoint_gt_even) {
             // res = res - 1
             R256.w[0]--;
             if (R256.w[0] == 0xffffffffffffffffull) {
               R256.w[1]--;
               if (R256.w[1] == 0xffffffffffffffffull) {
         	R256.w[2]--;
         	if (R256.w[2] == 0xffffffffffffffffull) {
         	  R256.w[3]--;
         	}
               }
             }
           } else {
             ;
           }
         } else {
           ;
         }
       }
       // determine the number of decimal digits in R256
       ind = nr_digits256 (R256); // ind >= p34
       // if R256 is sum, then ind > p34; if R256 is a difference, then
       // ind >= p34; this means that we can calculate the result rounded to
       // the destination precision, with unbounded exponent, starting from R256
       // and using the indicators from the rounding of C3 to avoid a double
       // rounding error

       if (ind < p34) {
         ;
       } else if (ind == p34) {
         // the result rounded to the destination precision with
         // unbounded exponent
         // is (-1)^p_sign * R256 * 10^e4
         res.w[1] = R256.w[1];
         res.w[0] = R256.w[0];
       } else { // if (ind > p34)
         // if more than P digits, round to nearest to P digits
         // round R256 to p34 digits
         x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
         // save C3 rounding indicators to help avoid double rounding error
         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
         is_midpoint_lt_even0 = is_midpoint_lt_even;
         is_midpoint_gt_even0 = is_midpoint_gt_even;
         // initialize rounding indicators
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
         // round to p34 digits; the result fits in 113 bits
         if (ind <= 38) {
           P128.w[1] = R256.w[1];
           P128.w[0] = R256.w[0];
           round128_19_38 (ind, x0, P128, &R128, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
         } else if (ind <= 57) {
           P192.w[2] = R256.w[2];
           P192.w[1] = R256.w[1];
           P192.w[0] = R256.w[0];
           round192_39_57 (ind, x0, P192, &R192, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
           R128.w[1] = R192.w[1];
           R128.w[0] = R192.w[0];
         } else { // if (ind <= 68)
           round256_58_76 (ind, x0, R256, &R256, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
           R128.w[1] = R256.w[1];
           R128.w[0] = R256.w[0];
         }
         // the rounded result has p34 = 34 digits
         e4 = e4 + x0 + incr_exp;

         res.w[1] = R128.w[1];
         res.w[0] = R128.w[0];

         // avoid a double rounding error
         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
             is_midpoint_lt_even) { // double rounding error upward
           // res = res - 1
           res.w[0]--;
           if (res.w[0] == 0xffffffffffffffffull)
             res.w[1]--;
           is_midpoint_lt_even = 0;
           is_inexact_lt_midpoint = 1;
           // Note: a double rounding error upward is not possible; for this
           // the result after the first rounding would have to be 99...95
           // (35 digits in all), possibly followed by a number of zeros; this
           // not possible in Cases (2)-(6) or (15)-(17) which may get here
           // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
           if (res.w[1] == 0x0000314dc6448d93ull &&
             res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
             res.w[0] = 0x378d8e63ffffffffull;
             e4--;
           }
         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
             is_midpoint_gt_even) { // double rounding error downward
           // res = res + 1
           res.w[0]++;
           if (res.w[0] == 0)
             res.w[1]++;
           is_midpoint_gt_even = 0;
           is_inexact_gt_midpoint = 1;
         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
           // if this second rounding was exact the result may still be
           // inexact because of the first rounding
           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
             is_inexact_gt_midpoint = 1;
           }
           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
             is_inexact_lt_midpoint = 1;
           }
         } else if (is_midpoint_gt_even &&
         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
           // pulled up to a midpoint
           is_inexact_lt_midpoint = 1;
           is_inexact_gt_midpoint = 0;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else if (is_midpoint_lt_even &&
         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
           // pulled down to a midpoint
           is_inexact_lt_midpoint = 0;
           is_inexact_gt_midpoint = 1;
           is_midpoint_lt_even = 0;
           is_midpoint_gt_even = 0;
         } else {
           ;
         }
       }

       // determine tininess
       if (rnd_mode == ROUNDING_TO_NEAREST) {
         if (e4 < expmin) {
           is_tiny = 1; // for other rounding modes apply correction
         }
       } else {
         // for RM, RP, RZ, RA apply correction in order to determine tininess
         // but do not save the result; apply the correction to
         // (-1)^p_sign * res * 10^0
         P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
         P128.w[0] = res.w[0];
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     0, &P128, pfpsf);
         scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
         // the number of digits in the significand is p34 = 34
         if (e4 + scale < expmin) {
           is_tiny = 1;
         }
       }

       // the result rounded to the destination precision with unbounded exponent
       // is (-1)^p_sign * res * 10^e4
       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
       // res.w[0] unchanged;
       // Note: res is correct only if expmin <= e4 <= expmax
       ind = p34; // the number of decimal digits in the signifcand of res

       // at this point we have the result rounded with unbounded exponent in
       // res and we know its tininess:
       // res = (-1)^p_sign * significand * 10^e4,
       // where q (significand) = ind = p34
       // Note: res is correct only if expmin <= e4 <= expmax

       // check for overflow if RN
       if (rnd_mode == ROUNDING_TO_NEAREST
           && (ind + e4) > (p34 + expmax)) {
         res.w[1] = p_sign | 0x7800000000000000ull;
         res.w[0] = 0x0000000000000000ull;
         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
         BID_SWAP128 (res);
         BID_RETURN (res)
       } // else not overflow or not RN, so continue

       // from this point on this is similar to the last part of the computation
       // for Cases (15), (16), (17)

       // if (e4 >= expmin) we have the result rounded with bounded exponent
       if (e4 < expmin) {
         x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
         // where the result rounded [at most] once is
         //   (-1)^p_sign * significand_res * 10^e4

         // avoid double rounding error
         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
         is_midpoint_lt_even0 = is_midpoint_lt_even;
         is_midpoint_gt_even0 = is_midpoint_gt_even;
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;

         if (x0 > ind) {
           // nothing is left of res when moving the decimal point left x0 digits
           is_inexact_lt_midpoint = 1;
           res.w[1] = p_sign | 0x0000000000000000ull;
           res.w[0] = 0x0000000000000000ull;
           e4 = expmin;
         } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
           // this is <, =, or > 1/2 ulp
           // compare the ind-digit value in the significand of res with
           // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
           // less than, equal to, or greater than 1/2 ulp (significand of res)
           R128.w[1] = res.w[1] & MASK_COEFF;
           R128.w[0] = res.w[0];
           if (ind <= 19) {
             if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
               lt_half_ulp = 1;
               is_inexact_lt_midpoint = 1;
             } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
               eq_half_ulp = 1;
               is_midpoint_gt_even = 1;
             } else { // > 1/2 ulp
               gt_half_ulp = 1;
               is_inexact_gt_midpoint = 1;
             }
           } else { // if (ind <= 38)
             if (R128.w[1] < midpoint128[ind - 20].w[1] ||
                 (R128.w[1] == midpoint128[ind - 20].w[1] &&
                 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
               lt_half_ulp = 1;
               is_inexact_lt_midpoint = 1;
             } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
                 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
               eq_half_ulp = 1;
               is_midpoint_gt_even = 1;
             } else { // > 1/2 ulp
               gt_half_ulp = 1;
               is_inexact_gt_midpoint = 1;
             }
           }
           if (lt_half_ulp || eq_half_ulp) {
             // res = +0.0 * 10^expmin
             res.w[1] = 0x0000000000000000ull;
             res.w[0] = 0x0000000000000000ull;
           } else { // if (gt_half_ulp)
             // res = +1 * 10^expmin
             res.w[1] = 0x0000000000000000ull;
             res.w[0] = 0x0000000000000001ull;
           }
           res.w[1] = p_sign | res.w[1];
           e4 = expmin;
         } else { // if (1 <= x0 <= ind - 1 <= 33)
           // round the ind-digit result to ind - x0 digits

           if (ind <= 18) { // 2 <= ind <= 18
             round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
         		  &is_inexact_lt_midpoint,
         		  &is_inexact_gt_midpoint);
             res.w[1] = 0x0;
             res.w[0] = R64;
           } else if (ind <= 38) {
             P128.w[1] = res.w[1] & MASK_COEFF;
             P128.w[0] = res.w[0];
             round128_19_38 (ind, x0, P128, &res, &incr_exp,
         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
         		    &is_inexact_lt_midpoint,
         		    &is_inexact_gt_midpoint);
           }
           e4 = e4 + x0; // expmin
           // we want the exponent to be expmin, so if incr_exp = 1 then
           // multiply the rounded result by 10 - it will still fit in 113 bits
           if (incr_exp) {
             // 64 x 128 -> 128
             P128.w[1] = res.w[1] & MASK_COEFF;
             P128.w[0] = res.w[0];
             __mul_64x128_to_128 (res, ten2k64[1], P128);
           }
           res.w[1] =
             p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
         					     w[1] & MASK_COEFF);
           // avoid a double rounding error
           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
                 is_midpoint_lt_even) { // double rounding error upward
             // res = res - 1
             res.w[0]--;
             if (res.w[0] == 0xffffffffffffffffull)
               res.w[1]--;
             // Note: a double rounding error upward is not possible; for this
             // the result after the first rounding would have to be 99...95
             // (35 digits in all), possibly followed by a number of zeros; this
             // not possible in this underflow case
             is_midpoint_lt_even = 0;
             is_inexact_lt_midpoint = 1;
           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
                 is_midpoint_gt_even) { // double rounding error downward
             // res = res + 1
             res.w[0]++;
             if (res.w[0] == 0)
               res.w[1]++;
             is_midpoint_gt_even = 0;
             is_inexact_gt_midpoint = 1;
           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	     !is_inexact_lt_midpoint
         	     && !is_inexact_gt_midpoint) {
             // if this second rounding was exact the result may still be
             // inexact because of the first rounding
             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
               is_inexact_gt_midpoint = 1;
             }
             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
               is_inexact_lt_midpoint = 1;
             }
           } else if (is_midpoint_gt_even &&
         	     (is_inexact_gt_midpoint0
         	      || is_midpoint_lt_even0)) {
             // pulled up to a midpoint
             is_inexact_lt_midpoint = 1;
             is_inexact_gt_midpoint = 0;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else if (is_midpoint_lt_even &&
         	     (is_inexact_lt_midpoint0
         	      || is_midpoint_gt_even0)) {
             // pulled down to a midpoint
             is_inexact_lt_midpoint = 0;
             is_inexact_gt_midpoint = 1;
             is_midpoint_lt_even = 0;
             is_midpoint_gt_even = 0;
           } else {
             ;
           }
         }
       }
       // res contains the correct result
       // apply correction if not rounding to nearest
       if (rnd_mode != ROUNDING_TO_NEAREST) {
         rounding_correction (rnd_mode,
         		     is_inexact_lt_midpoint,
         		     is_inexact_gt_midpoint,
         		     is_midpoint_lt_even, is_midpoint_gt_even,
         		     e4, &res, pfpsf);
       }
       if (is_midpoint_lt_even || is_midpoint_gt_even ||
           is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
         // set the inexact flag
         *pfpsf |= INEXACT_EXCEPTION;
         if (is_tiny)
           *pfpsf |= UNDERFLOW_EXCEPTION;
       }
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
         (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
         (delta + q3 <= p34 && p34 < q4)) { // Case (17)

       // calculate first the result rounded to the destination precision, with
       // unbounded exponent

       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
               rnd_mode, &is_midpoint_lt_even,
               &is_midpoint_gt_even, &is_inexact_lt_midpoint,
               &is_inexact_gt_midpoint, pfpsf, &res);
       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
       BID_SWAP128 (res);
       BID_RETURN (res)

     } else {
       ;
     }

   } // end if delta < 0

   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   BID_SWAP128 (res);
   BID_RETURN (res)

 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
             _EXC_INFO_PARAM) {
   UINT128 x = *px, y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128_fma (UINT128 x, UINT128 y, UINT128 z
             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
             _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even, is_midpoint_gt_even,
     is_inexact_lt_midpoint, is_inexact_gt_midpoint;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };

 #if DECIMAL_CALL_BY_REFERENCE
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x, &y, &z
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x, y,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 x = *px, y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 x1, y1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x1, &y1, &z1
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x1, y1,
         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 x = *px, y = *py;
   UINT128 z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 x1, y1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x1, &y1, &z
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x1, y1,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 x = *px, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 x1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x1, py, &z1
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x1, y,
         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 x = *px;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 x1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, &x1, py, pz
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x1, y,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 y1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, px, &y1, &z1
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x, y1,
         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 y = *py;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 y1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, px, &y1, pz
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x, y1,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
   UINT64 z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT128
 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
                _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
         	  &res, px, py, &z1
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
         		&is_inexact_lt_midpoint,
         		&is_inexact_gt_midpoint, x, y,
         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif
   BID_RETURN (res);
 }

 // Note: bid128qqq_fma is represented by bid128_fma

 // Note: bid64ddd_fma is represented by bid64_fma

 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 x = *px, y = *py;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 x1, y1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, &x1, &y1, pz
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x1, y1, z
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 x = *px, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 x1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, &x1, py, &z1
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x1, y, z1
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 x = *px;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 x1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, &x1, py, pz
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x1, y, z
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 y1, z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, px, &y1, &z1
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x, y1, z1
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 y = *py;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 y1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, px, &y1, pz
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x, y1, z
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT64 z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   UINT128 z1;

 #if DECIMAL_CALL_BY_REFERENCE
   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   bid64qqq_fma (&res1, px, py, &z1
         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	_EXC_INFO_ARG);
 #else
   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   res1 = bid64qqq_fma (x, y, z1
         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	       _EXC_INFO_ARG);
 #endif
   BID_RETURN (res1);
 }


 #if DECIMAL_CALL_BY_REFERENCE
 void
 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
   UINT128 x = *px, y = *py, z = *pz;
 #if !DECIMAL_GLOBAL_ROUNDING
   unsigned int rnd_mode = *prnd_mode;
 #endif
 #else
 UINT64
 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
               _EXC_INFO_PARAM) {
 #endif
   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
     is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   int incr_exp;
   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   UINT64 res1 = 0xbaddbaddbaddbaddull;
   unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
   UINT64 sign;
   UINT64 exp;
   int unbexp;
   UINT128 C;
   BID_UI64DOUBLE tmp;
   int nr_bits;
   int q, x0;
   int scale;
   int lt_half_ulp = 0, eq_half_ulp = 0;

   // Note: for rounding modes other than RN or RA, the result can be obtained
   // by rounding first to BID128 and then to BID64

   save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
   *pfpsf = 0;

 #if DECIMAL_CALL_BY_REFERENCE
   bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
         	  &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
         	  &res, &x, &y, &z
         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         	  _EXC_INFO_ARG);
 #else
   res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
         		&is_inexact_lt_midpoint0,
         		&is_inexact_gt_midpoint0, x, y,
         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
         		_EXC_INFO_ARG);
 #endif

   if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
       (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
       ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
       ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
 #if DECIMAL_CALL_BY_REFERENCE
     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
 #else
     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
 #endif
     // determine the unbiased exponent of the result
     unbexp = ((res1 >> 53) & 0x3ff) - 398;

     // if subnormal, res1  must have exp = -398
     // if tiny and inexact set underflow and inexact status flags
     if (!((res1 & MASK_NAN) == MASK_NAN) &&	// res1 not NaN
         (unbexp == -398)
         && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
         && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
             || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
       // set the inexact flag and the underflow flag
       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
     } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
                is_midpoint_lt_even0 || is_midpoint_gt_even0) {
       // set the inexact flag and the underflow flag
       *pfpsf |= INEXACT_EXCEPTION;
     }

     *pfpsf |= save_fpsf;
     BID_RETURN (res1);
   } // else continue, and use rounding to nearest to round to 16 digits

   // at this point the result is rounded to nearest (even or away) to 34 digits
   // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
   sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
   unbexp = (exp >> 49) - 6176;
   C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
   C.w[0] = res.w[LOW_128W];

   if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||	// result is zero
       (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
       // clear under/overflow
 #if DECIMAL_CALL_BY_REFERENCE
     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
 #else
     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
 #endif
     *pfpsf |= save_fpsf;
     BID_RETURN (res1);
   } // else continue

   // -398 - 34 <= unbexp <= 369 + 15
   if (rnd_mode == ROUNDING_TIES_AWAY) {
     // apply correction, if needed, to make the result rounded to nearest-even
     if (is_midpoint_gt_even) {
       // res = res - 1
       res1--; // res1 is now even
     } // else the result is already correctly rounded to nearest-even
   }
   // at this point the result is finite, non-zero canonical normal or subnormal,
   // and in most cases overflow or underflow will not occur

   // determine the number of digits q in the result
   // q = nr. of decimal digits in x
   // determine first the nr. of bits in x
   if (C.w[1] == 0) {
     if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
       // split the 64-bit value in two 32-bit halves to avoid rounding errors
       if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
         tmp.d = (double) (C.w[0] >> 32); // exact conversion
         nr_bits =
           33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       } else { // x < 2^32
         tmp.d = (double) (C.w[0]); // exact conversion
         nr_bits =
           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
       }
     } else { // if x < 2^53
       tmp.d = (double) C.w[0]; // exact conversion
       nr_bits =
         1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
   } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
     tmp.d = (double) C.w[1]; // exact conversion
     nr_bits =
       65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
   q = nr_digits[nr_bits - 1].digits;
   if (q == 0) {
     q = nr_digits[nr_bits - 1].digits1;
     if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
         (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
          C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
       q++;
   }
   // if q > 16, round to nearest even to 16 digits (but for underflow it may
   // have to be truncated even more)
   if (q > 16) {
     x0 = q - 16;
     if (q <= 18) {
       round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
         	    &is_midpoint_lt_even, &is_midpoint_gt_even,
         	    &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
     } else { // 19 <= q <= 34
       round128_19_38 (q, x0, C, &res128, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
       res1 = res128.w[0]; // the result fits in 64 bits
     }
     unbexp = unbexp + x0;
     if (incr_exp)
       unbexp++;
     q = 16; // need to set in case denormalization is necessary
   } else {
     // the result does not require a second rounding (and it must have
     // been exact in the first rounding, since q <= 16)
     res1 = C.w[0];
   }

   // avoid a double rounding error
   if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
       is_midpoint_lt_even) { // double rounding error upward
     // res = res - 1
     res1--; // res1 becomes odd
     is_midpoint_lt_even = 0;
     is_inexact_lt_midpoint = 1;
     if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
       res1 = 0x002386f26fc0ffffull; // 10^16 - 1
       unbexp--;
     }
   } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
       is_midpoint_gt_even) { // double rounding error downward
     // res = res + 1
     res1++; // res1 becomes odd (so it cannot be 10^16)
     is_midpoint_gt_even = 0;
     is_inexact_gt_midpoint = 1;
   } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
     // if this second rounding was exact the result may still be
     // inexact because of the first rounding
     if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
       is_inexact_gt_midpoint = 1;
     }
     if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
       is_inexact_lt_midpoint = 1;
     }
   } else if (is_midpoint_gt_even &&
              (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
     // pulled up to a midpoint
     is_inexact_lt_midpoint = 1;
     is_inexact_gt_midpoint = 0;
     is_midpoint_lt_even = 0;
     is_midpoint_gt_even = 0;
   } else if (is_midpoint_lt_even &&
              (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
     // pulled down to a midpoint
     is_inexact_lt_midpoint = 0;
     is_inexact_gt_midpoint = 1;
     is_midpoint_lt_even = 0;
     is_midpoint_gt_even = 0;
   } else {
     ;
   }
   // this is the result rounded correctly to nearest even, with unbounded exp.

   // check for overflow
   if (q + unbexp > P16 + expmax16) {
     res1 = sign | 0x7800000000000000ull;
     *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
     *pfpsf |= save_fpsf;
     BID_RETURN (res1)
   } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
     // not overflow; the result must be exact, and we can multiply res1 by
     // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
     scale = unbexp - expmax16;
     res1 = res1 * ten2k64[scale]; // res1 * 10^scale
     unbexp = expmax16; // unbexp - scale
   } else {
     ; // continue
   }

   // check for underflow
   if (q + unbexp < P16 + expmin16) {
     if (unbexp < expmin16) {
       // we must truncate more of res
       x0 = expmin16 - unbexp; // x0 >= 1
       is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
       is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
       is_midpoint_lt_even0 = is_midpoint_lt_even;
       is_midpoint_gt_even0 = is_midpoint_gt_even;
       is_inexact_lt_midpoint = 0;
       is_inexact_gt_midpoint = 0;
       is_midpoint_lt_even = 0;
       is_midpoint_gt_even = 0;
       // the number of decimal digits in res1 is q
       if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
         // 2 <= q <= 16, 1 <= x0 <= 15
         round64_2_18 (q, x0, res1, &res1, &incr_exp,
         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
         if (incr_exp) {
           // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
           res1 = ten2k64[q - x0];
         }
         unbexp = unbexp + x0; // expmin16
       } else if (x0 == q) {
         // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
         // determine relationship with 1/2 ulp
         // q <= 16
         if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
           lt_half_ulp = 1;
           is_inexact_lt_midpoint = 1;
         } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
           eq_half_ulp = 1;
           is_midpoint_gt_even = 1;
         } else { // > 1/2 ulp
           // gt_half_ulp = 1;
           is_inexact_gt_midpoint = 1;
         }
         if (lt_half_ulp || eq_half_ulp) {
           // res = +0.0 * 10^expmin16
           res1 = 0x0000000000000000ull;
         } else { // if (gt_half_ulp)
           // res = +1 * 10^expmin16
           res1 = 0x0000000000000001ull;
         }
         unbexp = expmin16;
       } else { // if (x0 > q)
         // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
         res1 = 0x0000000000000000ull;
         unbexp = expmin16;
         is_inexact_lt_midpoint = 1;
       }
       // avoid a double rounding error
       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
           is_midpoint_lt_even) { // double rounding error upward
         // res = res - 1
         res1--; // res1 becomes odd
         is_midpoint_lt_even = 0;
         is_inexact_lt_midpoint = 1;
       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
           is_midpoint_gt_even) { // double rounding error downward
         // res = res + 1
         res1++; // res1 becomes odd
         is_midpoint_gt_even = 0;
         is_inexact_gt_midpoint = 1;
       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
         	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
         // if this rounding was exact the result may still be
         // inexact because of the previous roundings
         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
           is_inexact_gt_midpoint = 1;
         }
         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
           is_inexact_lt_midpoint = 1;
         }
       } else if (is_midpoint_gt_even &&
         	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
         // pulled up to a midpoint
         is_inexact_lt_midpoint = 1;
         is_inexact_gt_midpoint = 0;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
       } else if (is_midpoint_lt_even &&
         	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
         // pulled down to a midpoint
         is_inexact_lt_midpoint = 0;
         is_inexact_gt_midpoint = 1;
         is_midpoint_lt_even = 0;
         is_midpoint_gt_even = 0;
       } else {
         ;
       }
     }
     // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
     // and the result is tiny and exact

     // check for inexact result
     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
         is_midpoint_lt_even || is_midpoint_gt_even ||
         is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
         is_midpoint_lt_even0 || is_midpoint_gt_even0) {
       // set the inexact flag and the underflow flag
       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
     }
   } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
              is_midpoint_lt_even || is_midpoint_gt_even) {
     *pfpsf |= INEXACT_EXCEPTION;
   }
   // this is the result rounded correctly to nearest, with bounded exponent

   if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
     // res = res + 1
     res1++; // res1 is now odd
   } // else the result is already correct

   // assemble the result
   if (res1 < 0x0020000000000000ull) { // res < 2^53
     res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
   } else { // res1 >= 2^53
     res1 = sign | MASK_STEERING_BITS |
       ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
   }
   *pfpsf |= save_fpsf;
   BID_RETURN (res1);
 }