| /* |
| Copyright (c) 2014, Intel Corporation |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| |
| * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| |
| * Neither the name of Intel Corporation nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /******************************************************************************/ |
| // ALGORITHM DESCRIPTION |
| // --------------------- |
| // |
| // Description: |
| // Let K = 64 (table size). |
| // x x/log(2) n |
| // e = 2 = 2 * T[j] * (1 + P(y)) |
| // where |
| // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] |
| // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] |
| // j/K |
| // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). |
| // |
| // P(y) is a minimax polynomial approximation of exp(x)-1 |
| // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). |
| // |
| // To avoid problems with arithmetic overflow and underflow, |
| // n n1 n2 |
| // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] |
| // where BIAS is a value of exponent bias. |
| // |
| // Special cases: |
| // exp(NaN) = NaN |
| // exp(+INF) = +INF |
| // exp(-INF) = 0 |
| // exp(x) = 1 for subnormals |
| // for finite argument, only exp(0)=1 is exact |
| // For IEEE double |
| // if x > 709.782712893383973096 then exp(x) overflow |
| // if x < -745.133219101941108420 then exp(x) underflow |
| // |
| /******************************************************************************/ |
| |
| #include <private/bionic_asm.h> |
| # -- Begin exp |
| ENTRY(exp) |
| # parameter 1: %xmm0 |
| ..B1.1: |
| ..___tag_value_exp.1: |
| subq $24, %rsp |
| ..___tag_value_exp.3: |
| movsd %xmm0, 8(%rsp) |
| ..B1.2: |
| unpcklpd %xmm0, %xmm0 |
| movapd cv(%rip), %xmm1 |
| movapd Shifter(%rip), %xmm6 |
| movapd 16+cv(%rip), %xmm2 |
| movapd 32+cv(%rip), %xmm3 |
| pextrw $3, %xmm0, %eax |
| andl $32767, %eax |
| movl $16527, %edx |
| subl %eax, %edx |
| subl $15504, %eax |
| orl %eax, %edx |
| cmpl $-2147483648, %edx |
| jae .L_2TAG_PACKET_0.0.2 |
| mulpd %xmm0, %xmm1 |
| addpd %xmm6, %xmm1 |
| movapd %xmm1, %xmm7 |
| subpd %xmm6, %xmm1 |
| mulpd %xmm1, %xmm2 |
| movapd 64+cv(%rip), %xmm4 |
| mulpd %xmm1, %xmm3 |
| movapd 80+cv(%rip), %xmm5 |
| subpd %xmm2, %xmm0 |
| movd %xmm7, %eax |
| movl %eax, %ecx |
| andl $63, %ecx |
| shll $4, %ecx |
| sarl $6, %eax |
| movl %eax, %edx |
| movdqa mmask(%rip), %xmm6 |
| pand %xmm6, %xmm7 |
| movdqa bias(%rip), %xmm6 |
| paddq %xmm6, %xmm7 |
| psllq $46, %xmm7 |
| subpd %xmm3, %xmm0 |
| lea Tbl_addr(%rip), %r8 |
| movapd (%rcx,%r8), %xmm2 |
| mulpd %xmm0, %xmm4 |
| movapd %xmm0, %xmm6 |
| movapd %xmm0, %xmm1 |
| mulpd %xmm6, %xmm6 |
| mulpd %xmm6, %xmm0 |
| addpd %xmm4, %xmm5 |
| mulsd %xmm6, %xmm0 |
| mulpd 48+cv(%rip), %xmm6 |
| addsd %xmm2, %xmm1 |
| unpckhpd %xmm2, %xmm2 |
| mulpd %xmm5, %xmm0 |
| addsd %xmm0, %xmm1 |
| orpd %xmm7, %xmm2 |
| unpckhpd %xmm0, %xmm0 |
| addsd %xmm1, %xmm0 |
| addsd %xmm6, %xmm0 |
| addl $894, %edx |
| cmpl $1916, %edx |
| ja .L_2TAG_PACKET_1.0.2 |
| mulsd %xmm2, %xmm0 |
| addsd %xmm2, %xmm0 |
| jmp ..B1.5 |
| .L_2TAG_PACKET_1.0.2: |
| xorpd %xmm3, %xmm3 |
| movapd ALLONES(%rip), %xmm4 |
| movl $-1022, %edx |
| subl %eax, %edx |
| movd %edx, %xmm5 |
| psllq %xmm5, %xmm4 |
| movl %eax, %ecx |
| sarl $1, %eax |
| pinsrw $3, %eax, %xmm3 |
| movapd ebias(%rip), %xmm6 |
| psllq $4, %xmm3 |
| psubd %xmm3, %xmm2 |
| mulsd %xmm2, %xmm0 |
| cmpl $52, %edx |
| jg .L_2TAG_PACKET_2.0.2 |
| andpd %xmm2, %xmm4 |
| paddd %xmm6, %xmm3 |
| subsd %xmm4, %xmm2 |
| addsd %xmm2, %xmm0 |
| cmpl $1023, %ecx |
| jge .L_2TAG_PACKET_3.0.2 |
| pextrw $3, %xmm0, %ecx |
| andl $32768, %ecx |
| orl %ecx, %edx |
| cmpl $0, %edx |
| je .L_2TAG_PACKET_4.0.2 |
| movapd %xmm0, %xmm6 |
| addsd %xmm4, %xmm0 |
| mulsd %xmm3, %xmm0 |
| pextrw $3, %xmm0, %ecx |
| andl $32752, %ecx |
| cmpl $0, %ecx |
| je .L_2TAG_PACKET_5.0.2 |
| jmp ..B1.5 |
| .L_2TAG_PACKET_5.0.2: |
| mulsd %xmm3, %xmm6 |
| mulsd %xmm3, %xmm4 |
| movq %xmm6, %xmm0 |
| pxor %xmm4, %xmm6 |
| psrad $31, %xmm6 |
| pshufd $85, %xmm6, %xmm6 |
| psllq $1, %xmm0 |
| psrlq $1, %xmm0 |
| pxor %xmm6, %xmm0 |
| psrlq $63, %xmm6 |
| paddq %xmm6, %xmm0 |
| paddq %xmm4, %xmm0 |
| movl $15, (%rsp) |
| jmp .L_2TAG_PACKET_6.0.2 |
| .L_2TAG_PACKET_4.0.2: |
| addsd %xmm4, %xmm0 |
| mulsd %xmm3, %xmm0 |
| jmp ..B1.5 |
| .L_2TAG_PACKET_3.0.2: |
| addsd %xmm4, %xmm0 |
| mulsd %xmm3, %xmm0 |
| pextrw $3, %xmm0, %ecx |
| andl $32752, %ecx |
| cmpl $32752, %ecx |
| jnb .L_2TAG_PACKET_7.0.2 |
| jmp ..B1.5 |
| .L_2TAG_PACKET_2.0.2: |
| paddd %xmm6, %xmm3 |
| addpd %xmm2, %xmm0 |
| mulsd %xmm3, %xmm0 |
| movl $15, (%rsp) |
| jmp .L_2TAG_PACKET_6.0.2 |
| .L_2TAG_PACKET_8.0.2: |
| cmpl $2146435072, %eax |
| jae .L_2TAG_PACKET_9.0.2 |
| movl 12(%rsp), %eax |
| cmpl $-2147483648, %eax |
| jae .L_2TAG_PACKET_10.0.2 |
| movsd XMAX(%rip), %xmm0 |
| mulsd %xmm0, %xmm0 |
| .L_2TAG_PACKET_7.0.2: |
| movl $14, (%rsp) |
| jmp .L_2TAG_PACKET_6.0.2 |
| .L_2TAG_PACKET_10.0.2: |
| movsd XMIN(%rip), %xmm0 |
| mulsd %xmm0, %xmm0 |
| movl $15, (%rsp) |
| jmp .L_2TAG_PACKET_6.0.2 |
| .L_2TAG_PACKET_9.0.2: |
| movl 8(%rsp), %edx |
| cmpl $2146435072, %eax |
| ja .L_2TAG_PACKET_11.0.2 |
| cmpl $0, %edx |
| jne .L_2TAG_PACKET_11.0.2 |
| movl 12(%rsp), %eax |
| cmpl $2146435072, %eax |
| jne .L_2TAG_PACKET_12.0.2 |
| movsd INF(%rip), %xmm0 |
| jmp ..B1.5 |
| .L_2TAG_PACKET_12.0.2: |
| movsd ZERO(%rip), %xmm0 |
| jmp ..B1.5 |
| .L_2TAG_PACKET_11.0.2: |
| movsd 8(%rsp), %xmm0 |
| addsd %xmm0, %xmm0 |
| jmp ..B1.5 |
| .L_2TAG_PACKET_0.0.2: |
| movl 12(%rsp), %eax |
| andl $2147483647, %eax |
| cmpl $1083179008, %eax |
| jae .L_2TAG_PACKET_8.0.2 |
| movsd 8(%rsp), %xmm0 |
| addsd ONE_val(%rip), %xmm0 |
| jmp ..B1.5 |
| .L_2TAG_PACKET_6.0.2: |
| movq %xmm0, 16(%rsp) |
| ..B1.3: |
| movq 16(%rsp), %xmm0 |
| .L_2TAG_PACKET_13.0.2: |
| ..B1.5: |
| addq $24, %rsp |
| ..___tag_value_exp.4: |
| ret |
| ..___tag_value_exp.5: |
| END(exp) |
| # -- End exp |
| .section .rodata, "a" |
| .align 16 |
| .align 16 |
| cv: |
| .long 1697350398 |
| .long 1079448903 |
| .long 1697350398 |
| .long 1079448903 |
| .long 4277796864 |
| .long 1065758274 |
| .long 4277796864 |
| .long 1065758274 |
| .long 3164486458 |
| .long 1025308570 |
| .long 3164486458 |
| .long 1025308570 |
| .long 4294967294 |
| .long 1071644671 |
| .long 4294967294 |
| .long 1071644671 |
| .long 3811088480 |
| .long 1062650204 |
| .long 1432067621 |
| .long 1067799893 |
| .long 3230715663 |
| .long 1065423125 |
| .long 1431604129 |
| .long 1069897045 |
| .type cv,@object |
| .size cv,96 |
| .align 16 |
| Shifter: |
| .long 0 |
| .long 1127743488 |
| .long 0 |
| .long 1127743488 |
| .type Shifter,@object |
| .size Shifter,16 |
| .align 16 |
| mmask: |
| .long 4294967232 |
| .long 0 |
| .long 4294967232 |
| .long 0 |
| .type mmask,@object |
| .size mmask,16 |
| .align 16 |
| bias: |
| .long 65472 |
| .long 0 |
| .long 65472 |
| .long 0 |
| .type bias,@object |
| .size bias,16 |
| .align 16 |
| Tbl_addr: |
| .long 0 |
| .long 0 |
| .long 0 |
| .long 0 |
| .long 235107661 |
| .long 1018002367 |
| .long 1048019040 |
| .long 11418 |
| .long 896005651 |
| .long 1015861842 |
| .long 3541402996 |
| .long 22960 |
| .long 1642514529 |
| .long 1012987726 |
| .long 410360776 |
| .long 34629 |
| .long 1568897900 |
| .long 1016568486 |
| .long 1828292879 |
| .long 46424 |
| .long 1882168529 |
| .long 1010744893 |
| .long 852742562 |
| .long 58348 |
| .long 509852888 |
| .long 1017336174 |
| .long 3490863952 |
| .long 70401 |
| .long 653277307 |
| .long 1017431380 |
| .long 2930322911 |
| .long 82586 |
| .long 1649557430 |
| .long 1017729363 |
| .long 1014845818 |
| .long 94904 |
| .long 1058231231 |
| .long 1015777676 |
| .long 3949972341 |
| .long 107355 |
| .long 1044000607 |
| .long 1016786167 |
| .long 828946858 |
| .long 119943 |
| .long 1151779725 |
| .long 1015705409 |
| .long 2288159958 |
| .long 132667 |
| .long 3819481236 |
| .long 1016499965 |
| .long 1853186616 |
| .long 145530 |
| .long 2552227826 |
| .long 1015039787 |
| .long 1709341917 |
| .long 158533 |
| .long 1829350193 |
| .long 1015216097 |
| .long 4112506593 |
| .long 171677 |
| .long 1913391795 |
| .long 1015756674 |
| .long 2799960843 |
| .long 184965 |
| .long 1303423926 |
| .long 1015238005 |
| .long 171030293 |
| .long 198398 |
| .long 1574172746 |
| .long 1016061241 |
| .long 2992903935 |
| .long 211976 |
| .long 3424156969 |
| .long 1017196428 |
| .long 926591434 |
| .long 225703 |
| .long 1938513547 |
| .long 1017631273 |
| .long 887463926 |
| .long 239579 |
| .long 2804567149 |
| .long 1015390024 |
| .long 1276261410 |
| .long 253606 |
| .long 631083525 |
| .long 1017690182 |
| .long 569847337 |
| .long 267786 |
| .long 1623370770 |
| .long 1011049453 |
| .long 1617004845 |
| .long 282120 |
| .long 3667985273 |
| .long 1013894369 |
| .long 3049340112 |
| .long 296610 |
| .long 3145379760 |
| .long 1014403278 |
| .long 3577096743 |
| .long 311258 |
| .long 2603100681 |
| .long 1017152460 |
| .long 1990012070 |
| .long 326066 |
| .long 3249202951 |
| .long 1017448880 |
| .long 1453150081 |
| .long 341035 |
| .long 419288974 |
| .long 1016280325 |
| .long 917841882 |
| .long 356167 |
| .long 3793507337 |
| .long 1016095713 |
| .long 3712504873 |
| .long 371463 |
| .long 728023093 |
| .long 1016345318 |
| .long 363667784 |
| .long 386927 |
| .long 2582678538 |
| .long 1017123460 |
| .long 2956612996 |
| .long 402558 |
| .long 7592966 |
| .long 1016721543 |
| .long 2186617380 |
| .long 418360 |
| .long 228611441 |
| .long 1016696141 |
| .long 1719614412 |
| .long 434334 |
| .long 2261665670 |
| .long 1017457593 |
| .long 1013258798 |
| .long 450482 |
| .long 544148907 |
| .long 1017323666 |
| .long 3907805043 |
| .long 466805 |
| .long 2383914918 |
| .long 1017143586 |
| .long 1447192520 |
| .long 483307 |
| .long 1176412038 |
| .long 1017267372 |
| .long 1944781190 |
| .long 499988 |
| .long 2882956373 |
| .long 1013312481 |
| .long 919555682 |
| .long 516851 |
| .long 3154077648 |
| .long 1016528543 |
| .long 2571947538 |
| .long 533897 |
| .long 348651999 |
| .long 1016405780 |
| .long 2604962540 |
| .long 551129 |
| .long 3253791412 |
| .long 1015920431 |
| .long 1110089947 |
| .long 568549 |
| .long 1509121860 |
| .long 1014756995 |
| .long 2568320822 |
| .long 586158 |
| .long 2617649212 |
| .long 1017340090 |
| .long 2966275556 |
| .long 603959 |
| .long 553214634 |
| .long 1016457425 |
| .long 2682146383 |
| .long 621954 |
| .long 730975783 |
| .long 1014083580 |
| .long 2191782032 |
| .long 640145 |
| .long 1486499517 |
| .long 1016818996 |
| .long 2069751140 |
| .long 658534 |
| .long 2595788928 |
| .long 1016407932 |
| .long 2990417244 |
| .long 677123 |
| .long 1853053619 |
| .long 1015310724 |
| .long 1434058175 |
| .long 695915 |
| .long 2462790535 |
| .long 1015814775 |
| .long 2572866477 |
| .long 714911 |
| .long 3693944214 |
| .long 1017259110 |
| .long 3092190714 |
| .long 734114 |
| .long 2979333550 |
| .long 1017188654 |
| .long 4076559942 |
| .long 753526 |
| .long 174054861 |
| .long 1014300631 |
| .long 2420883922 |
| .long 773150 |
| .long 816778419 |
| .long 1014197934 |
| .long 3716502172 |
| .long 792987 |
| .long 3507050924 |
| .long 1015341199 |
| .long 777507147 |
| .long 813041 |
| .long 1821514088 |
| .long 1013410604 |
| .long 3706687593 |
| .long 833312 |
| .long 920623539 |
| .long 1016295433 |
| .long 1242007931 |
| .long 853805 |
| .long 2789017511 |
| .long 1014276997 |
| .long 3707479175 |
| .long 874520 |
| .long 3586233004 |
| .long 1015962192 |
| .long 64696965 |
| .long 895462 |
| .long 474650514 |
| .long 1016642419 |
| .long 863738718 |
| .long 916631 |
| .long 1614448851 |
| .long 1014281732 |
| .long 3884662774 |
| .long 938030 |
| .long 2450082086 |
| .long 1016164135 |
| .long 2728693977 |
| .long 959663 |
| .long 1101668360 |
| .long 1015989180 |
| .long 3999357479 |
| .long 981531 |
| .long 835814894 |
| .long 1015702697 |
| .long 1533953344 |
| .long 1003638 |
| .long 1301400989 |
| .long 1014466875 |
| .long 2174652632 |
| .long 1025985 |
| .type Tbl_addr,@object |
| .size Tbl_addr,1024 |
| .align 16 |
| ALLONES: |
| .long 4294967295 |
| .long 4294967295 |
| .long 4294967295 |
| .long 4294967295 |
| .type ALLONES,@object |
| .size ALLONES,16 |
| .align 16 |
| ebias: |
| .long 0 |
| .long 1072693248 |
| .long 0 |
| .long 1072693248 |
| .type ebias,@object |
| .size ebias,16 |
| .align 4 |
| XMAX: |
| .long 4294967295 |
| .long 2146435071 |
| .type XMAX,@object |
| .size XMAX,8 |
| .align 4 |
| XMIN: |
| .long 0 |
| .long 1048576 |
| .type XMIN,@object |
| .size XMIN,8 |
| .align 4 |
| INF: |
| .long 0 |
| .long 2146435072 |
| .type INF,@object |
| .size INF,8 |
| .align 4 |
| ZERO: |
| .long 0 |
| .long 0 |
| .type ZERO,@object |
| .size ZERO,8 |
| .align 4 |
| ONE_val: |
| .long 0 |
| .long 1072693248 |
| .type ONE_val,@object |
| .size ONE_val,8 |
| .data |
| .section .note.GNU-stack, "" |
| // -- Begin DWARF2 SEGMENT .eh_frame |
| .section .eh_frame,"a",@progbits |
| .eh_frame_seg: |
| .align 1 |
| .4byte 0x00000014 |
| .8byte 0x00527a0100000000 |
| .8byte 0x08070c1b01107801 |
| .4byte 0x00000190 |
| .4byte 0x0000001c |
| .4byte 0x0000001c |
| .4byte ..___tag_value_exp.1-. |
| .4byte ..___tag_value_exp.5-..___tag_value_exp.1 |
| .2byte 0x0400 |
| .4byte ..___tag_value_exp.3-..___tag_value_exp.1 |
| .2byte 0x200e |
| .byte 0x04 |
| .4byte ..___tag_value_exp.4-..___tag_value_exp.3 |
| .2byte 0x080e |
| .byte 0x00 |
| # End |