blob: f0a1477981f268dd58e2e8329f811cd0c2d9f70f [file] [log] [blame]
/*---------------------------------------------------------------------------*
* himul32.h *
* *
* Copyright 2007, 2008 Nuance Communciations, Inc. *
* *
* Licensed under the Apache License, Version 2.0 (the 'License'); *
* you may not use this file except in compliance with the License. *
* *
* You may obtain a copy of the License at *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an 'AS IS' BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*---------------------------------------------------------------------------*/
/*
////////////////////////////////////////////////////////////////////////////
//
// FILE: himul32.cpp
//
// CREATED: 11-September-99
//
// DESCRIPTION: A multiplier returns most-significant 32 bits of the 64-bit
// product of its two signed 32-bit integers
//
//
//
//
// MODIFICATIONS:
// Revision history log
VSS revision history. Do not edit by hand.
$NoKeywords: $
*/
/* do not use PPC. VxWorks defines the PPC in vxcpu.h */
#if defined(_PPC_)
/* Reads timebase register for a higher precision clock */
asm PINLINE int32 himul32(asr_int32_t factor1, asr_int32_T factor2)
{
% reg factor1;
reg factor2;
mulhw r3, factor1, factor2 # place the high order 32 bits of the product in the return register r3
}
#else
/******************************************************************
himul32 returns the most-significant 32 bits of the 64-bit
product of its two signed 32-bit integer arguments.
In other words, it's the exact value of the mathematical expression
floor( (factor1 * factor2) / 2**32 )
This is a platform-independent definition that needs to be
implemented in platform-specific ways.
Parameters:
factor1 -- first signed 32 bit integer
factor2 -- second signed 32 bit integer
Returns:
the most-significant 32 bits of the multiplication results
*********************************************************************/
#if COMPILER == C_MICROSOFT
#if TARGET_CPU == CPU_I86
PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2)
{
asr_int32_t retval;
/*
// The x86 imul instruction, given a single 32-bit operand, computes
// the signed 64-bit product of register EAX and that operand, into
// the register pair EDX:EAX. So we have to move the first factor into
// EAX, then IMUL, then take the high 32 bits (in EDX) and move them
// back to EAX (because that's where a function's return value is
// taken from).
*/
__asm {
mov eax, factor1
imul factor2
mov retval, edx
}
return retval;
}
#else /* TARGET_CPU != CPU_I86 */
PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2)
{
union {
__int64 full;
struct
{
asr_int32_t lo;
asr_int32_t hi;
}
pieces;
} result;
__int64 x = factor1;
__int64 y = factor2;
result.full = x * y;
return result.pieces.hi;
}
#endif /* TARGET_CPU == CPU_I86 */
#else /* ~ COMPILER != C_MICROSOFT */
/*** ANSI C ***/
PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2)
{
asr_uint32_t x = (asr_uint32_t)factor1;
asr_uint32_t y = (asr_uint32_t)factor2;
asr_uint32_t xhi, xlo, yhi, ylo;
asr_uint32_t hi, lo, mid;
asr_uint32_t oldlo, carry;
int sign = 0;
if (factor1 < 0)
{
x = (asr_uint32_t) - factor1;
sign = 1;
}
if (factor2 < 0)
{
y = (asr_uint32_t) - factor2;
sign = 1 - sign;
}
xhi = x >> 16; /* <= 2**15 */
xlo = x & 0xffff; /* < 2**16 */
yhi = y >> 16; /* <= 2**15 */
ylo = y & 0xffff; /* < 2**16 */
lo = xlo * ylo;
/*
// xhi <= 2**15 and ylo <= 2**16-1, so
// xhi * ylo <= 2**31 - 2**15.
// Ditto for yhi * xlo, so their sum is
// <= 2*32 - 2**16, and so the next line can't overflow.
*/
mid = xhi * ylo + yhi * xlo;
hi = xhi * yhi;
/*
// Now add the low part of mid to the high part of lo, and the
// high part of mid to the low part of hi:
// xxxxxxxx xxxxxxxx lo
// xxxxxxxx xxxxxxxx mid
// xxxxxxxx xxxxxxxx hi
// -----------------------------------
// xxxxxxxx xxxxxxxx lo
// xxxxxxxx xxxxxxxx hi
// Note that folding mid into lo can cause a carry. An old trick
// for portable carry-detection applies: if a and b are unsigned,
// their sum overflows if and only if it's less than a (or b; can
// check either one).
*/
oldlo = lo;
lo += mid << 16;
carry = lo < oldlo;
hi += carry + (mid >> 16);
if (sign)
{
/*
// Result must be negated, which is the same as taking the
// complement and adding 1. So there's a carry out of the low
// half if and only if it's 0 now.
*/
hi = ~hi;
hi += lo == 0;
}
return (asr_int32_t)hi;
}
#endif /* ~ COMPILER == C_MICROSOFT */
#endif