blob: 9fa7c8e55a688fbfd651417dd257682ca5c12f5f [file] [log] [blame]
/* Copyright (c) 2002 Michael Stumpf <mistumpf@de.pepperl-fuchs.com>
Copyright (c) 2006 Dmitry Xmelkov
Copyright (c) 2008 Ruud v Gessel
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of the copyright holders nor the names of
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE. */
#if !defined(__AVR_TINY__)
#include "fp32def.h"
#include "asmdef.h"
/* double sqrt (double);
Square root function.
*/
FUNCTION sqrt
.L_nf: brne .L_pk ; NaN, return as is
brtc .L_pk ; sqrt(+Inf) --> +Inf
.L_nan: rjmp _U(__fp_nan)
.L_pk: rjmp _U(__fp_mpack)
ENTRY sqrt
; split and check arg.
rcall _U(__fp_splitA)
brcs .L_nf ; !isfinite(A)
tst rA3
breq .L_pk ; return 0 with original sign
brts .L_nan ; sqrt(negative) --> NaN
; exponent bias
subi rA3, 127
sbc rB3, rB3 ; exponent high byte
; normalize, if A is subnormal
sbrs rA2, 7
rcall _U(__fp_norm2)
#define msk0 r0
#define msk1 r1
#define msk2 rBE
clr msk0 ; msk1=R1 already 0
ldi msk2, 0x60 ; Initial rotation mask =
; 01100000.00000000.00000000
ldi rB2, 0xa0
X_movw rB0, msk0 ; Initial developing root =
; 10100000.00000000.00000000
/* TODO: Now the Avr-libs does not have an infrastructure to build and
*test automaticaly* with both OPTIMIZE_SPEED definitions. So the
one variant is enabled today only.
*/
#if 1 /* defined(OPTIMIZE_SPEED) && OPTIMIZE_SPEED */
;** Optimized for speed (9 code words larger than size optimized,
; 67 less cycles in average)
subi rA2, 0x80
lsr rB3
ror rA3 ; Divide exponent by 2, C==>exponent was odd
brcc 1f ; Jump for even exponent in argument
subi rA2, lo8(-0x40) ; Initial remainder for odd exponent.
; Loop for upper 23 bits
.Loop: lsl rA0
rol rA1
rol rA2 ; Shift left remainder argument
brcs 2f ; C --> Bit is always 1 (rA * 2 gave C)
1: cp rB0, rA0
cpc rB1, rA1
cpc rB2, rA2 ; Does test value fit?
brcc 3f ; NC --> nope, bit is 0
2: sub rA0, rB0
sbc rA1, rB1
sbc rA2, rB2 ; Prepare remainder argument for next bits
or rB0, msk0
or rB1, msk1
or rB2, msk2 ; Set developing bit to 1
3: lsr msk2
ror msk1
ror msk0 ; Shift right mask, C --> end loop
eor rB0, msk0
eor rB1, msk1
eor rB2, msk2 ; Shift right test bit in developing root
brcc .Loop ; Develop 23 bits of the sqrt
; Loop for bit 0 and rounding
.Loop1: lsl rA0
rol rA1
rol rA2 ; Shift left remainder argument
brcs 4f ; C--> Last bits always 1
cp rB0, rA0
cpc rB1, rA1
cpc rB2, rA2 ; Test for last bit 1
brcc 5f ; Nope, stays the same
4: sbc rA0, rB0 ; MUST BE SBC !!
sbc rA1, rB1
sbc rA2, rB2 ; Prepare remainder argument for next bit
add rB0, msk0
adc rB1, msk1
adc rB2, msk1 ; Add 1 to result
5: com msk2 ; ZF if second time
brne .Loop1 ; 1 for last bit, 1 for rounding
#else /* vs. to OPTIMIZE_SPEED */
;** Optimized for size (9 code words smaller than speed optimized,
; 67 more cycles in average)
#define tv rAE
clr tv ; Test value for end of loop
subi rA2, 0x40 ; Initial remainder for odd exponent
lsr rB3
ror rA3 ; Divide exponent by 2, C==>exponent was odd
brcs 3f ; Jump for odd exponent in argument
subi rA2, 0x40 ; Initial remainder for even exponent, C=0
; Loop for all 24 bits
.Loop: brcc 2f ; NC --> nope, bit is 0
cp msk2, tv ; Only needed to get the proper rounding
; for ffffff
sbc rA0, rB0
sbc rA1, rB1
sbc rA2, rB2 ; Prepare remainder argument for next bits
or rB0, msk0
or rB1, msk1
or rB2, msk2 ; Set developing bit to 1
2: lsr msk2
ror msk1
ror msk0 ; Shift right mask, C --> end loop
rol tv ; Bit 1 set if end of loop
eor rB0, msk0
eor rB1, msk1
eor rB2, msk2 ; Shift right test bit in developing root
3: lsl rA0
rol rA1
rol rA2 ; Shift left remainder argument (C used
; at .Loop)
brcs 4f
cp rB0, rA0
cpc rB1, rA1
cpc rB2, rA2
4: sbrs tv, 1
rjmp .Loop
; Rounding
adc rB0, msk1
adc rB1, msk1
adc rB2, msk1 ; Rounded mantissa ready (msk1=0)
#endif /* !OPTIMIZE_SPEED */
X_movw rA0, rB0
mov rA2, rB2 ; Copy to rA
subi rA3, lo8(-127) ; exponent bias
lsl rA2
lsr rA3
ror rA2
ret
ENDFUNC
#endif /* !defined(__AVR_TINY__) */