| /* |
| * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| package org.graalvm.compiler.lir.amd64; |
| |
| import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL; |
| import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; |
| import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK; |
| import static jdk.vm.ci.code.ValueUtil.asRegister; |
| |
| import org.graalvm.compiler.asm.Label; |
| import org.graalvm.compiler.asm.amd64.AMD64Address; |
| import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; |
| import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; |
| import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; |
| import org.graalvm.compiler.core.common.LIRKind; |
| import org.graalvm.compiler.debug.GraalError; |
| import org.graalvm.compiler.lir.LIRInstructionClass; |
| import org.graalvm.compiler.lir.Opcode; |
| import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; |
| import org.graalvm.compiler.lir.asm.CompilationResultBuilder; |
| import org.graalvm.compiler.lir.gen.LIRGeneratorTool; |
| |
| import jdk.vm.ci.amd64.AMD64; |
| import jdk.vm.ci.amd64.AMD64.CPUFeature; |
| import jdk.vm.ci.amd64.AMD64Kind; |
| import jdk.vm.ci.code.Register; |
| import jdk.vm.ci.meta.AllocatableValue; |
| import jdk.vm.ci.meta.Value; |
| |
| public final class AMD64MathIntrinsicUnaryOp extends AMD64LIRInstruction { |
| public static final LIRInstructionClass<AMD64MathIntrinsicUnaryOp> TYPE = LIRInstructionClass.create(AMD64MathIntrinsicUnaryOp.class); |
| |
| public enum UnaryIntrinsicOpcode { |
| LOG, |
| LOG10, |
| SIN, |
| COS, |
| TAN, |
| EXP |
| } |
| |
| @Opcode private final UnaryIntrinsicOpcode opcode; |
| @Def protected Value result; |
| @Use protected Value input; |
| @Temp({REG, ILLEGAL}) protected Value xmm1Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm2Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm3Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm4Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm5Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm6Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm7Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm8Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm9Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value xmm10Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value gpr1Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value gpr2Temp = Value.ILLEGAL; |
| @Temp protected AllocatableValue rcxTemp; |
| @Temp({REG, ILLEGAL}) protected Value gpr4Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value gpr5Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value gpr6Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value gpr7Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value gpr8Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value gpr9Temp = Value.ILLEGAL; |
| @Temp({REG, ILLEGAL}) protected Value gpr10Temp = Value.ILLEGAL; |
| @Temp({STACK, ILLEGAL}) protected Value stackTemp = Value.ILLEGAL; |
| |
| CompilationResultBuilder internalCrb; |
| |
| public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input, Value stackTemp) { |
| super(TYPE); |
| this.opcode = opcode; |
| this.result = result; |
| this.input = input; |
| if (opcode == UnaryIntrinsicOpcode.LOG || opcode == UnaryIntrinsicOpcode.LOG10 || |
| opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS || |
| opcode == UnaryIntrinsicOpcode.TAN || opcode == UnaryIntrinsicOpcode.EXP) { |
| this.gpr1Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr2Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.rcxTemp = AMD64.rcx.asValue(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr4Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.xmm1Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm2Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm3Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm4Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm5Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm6Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm7Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| |
| if (opcode == UnaryIntrinsicOpcode.EXP) { |
| this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm10Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| } |
| |
| if (opcode == UnaryIntrinsicOpcode.TAN) { |
| this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| } |
| |
| if (opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS) { |
| this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); |
| this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); |
| } |
| |
| this.stackTemp = stackTemp; |
| } |
| } |
| |
| public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input) { |
| this(tool, opcode, result, input, Value.ILLEGAL); |
| } |
| |
| private void setCrb(CompilationResultBuilder crb) { |
| internalCrb = crb; |
| } |
| |
| private AMD64Address externalAddress(ArrayDataPointerConstant curPtr) { |
| return (AMD64Address) internalCrb.recordDataReferenceInCode(curPtr); |
| } |
| |
| @Override |
| public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { |
| switch (opcode) { |
| case LOG: |
| logIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); |
| break; |
| case LOG10: |
| log10Intrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); |
| break; |
| case SIN: |
| sinIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); |
| break; |
| case COS: |
| cosIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); |
| break; |
| case TAN: |
| tanIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); |
| break; |
| case EXP: |
| expIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); |
| break; |
| default: |
| throw GraalError.shouldNotReachHere(); |
| } |
| } |
| |
| private static int[] logTwoTable = { |
| 0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800, |
| 0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40, |
| 0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd, |
| 0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4, |
| 0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00, |
| 0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15, |
| 0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7, |
| 0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79, |
| 0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800, |
| 0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028, |
| 0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c, |
| 0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5, |
| 0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00, |
| 0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0, |
| 0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3, |
| 0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde, |
| 0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800, |
| 0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8, |
| 0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742, |
| 0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b, |
| 0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400, |
| 0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a, |
| 0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935, |
| 0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5, |
| 0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000, |
| 0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a, |
| 0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a, |
| 0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf, |
| 0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800, |
| 0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60, |
| 0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d, |
| 0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525, |
| 0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800, |
| 0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344, |
| 0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0, |
| 0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313, |
| 0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000, |
| 0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855, |
| 0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a, |
| 0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d, |
| 0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800, |
| 0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8, |
| 0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00, |
| 0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a, |
| 0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000, |
| 0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0, |
| 0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675, |
| 0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436, |
| 0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000, |
| 0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1, |
| 0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4, |
| 0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1, |
| 0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800, |
| 0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c, |
| 0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c, |
| 0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b, |
| 0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800, |
| 0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c, |
| 0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11, |
| 0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227, |
| 0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000, |
| 0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb, |
| 0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6, |
| 0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d, |
| 0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000, |
| 0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a, |
| 0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1, |
| 0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb, |
| 0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000, |
| 0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e, |
| 0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c, |
| 0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8, |
| 0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000, |
| 0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc, |
| 0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1, |
| 0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23, |
| 0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000, |
| 0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598, |
| 0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd, |
| 0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068, |
| 0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000, |
| 0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8, |
| 0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8, |
| 0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c, |
| 0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000, |
| 0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a, |
| 0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef, |
| 0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e, |
| 0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000, |
| 0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598, |
| 0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b, |
| 0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2, |
| 0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000, |
| 0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94, |
| 0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28, |
| 0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a, |
| 0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000, |
| 0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925, |
| 0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe, |
| 0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b, |
| 0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000, |
| 0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805, |
| 0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000, |
| 0x80000000 |
| }; |
| |
| private static int[] logTwoData = { |
| 0xfefa3800, 0x3fa62e42, 0x93c76730, 0x3ceef357 |
| }; |
| |
| private static int[] coeffLogTwoData = { |
| 0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000, 0x3d6fb175, |
| 0xbfc5555e, 0x55555555, 0x3fd55555, 0x9999999a, 0x3fc99999, |
| 0x00000000, 0xbfe00000 |
| }; |
| |
| /* |
| * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) |
| * Source Code |
| * |
| * ALGORITHM DESCRIPTION - LOG() --------------------- |
| * |
| * x=2^k * mx, mx in [1,2) |
| * |
| * Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7 |
| * |
| * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) |
| * |
| * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7 |
| * polynomial -log(B) read from data table (high, low parts) Result is formed from high and low |
| * parts. |
| * |
| * Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0) |
| * = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception |
| * raised if x < -0, including -INF |
| * |
| */ |
| |
| public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { |
| ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16); |
| ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16); |
| ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16); |
| |
| Label bb0 = new Label(); |
| Label bb1 = new Label(); |
| Label bb2 = new Label(); |
| Label bb3 = new Label(); |
| Label bb4 = new Label(); |
| Label bb5 = new Label(); |
| Label bb6 = new Label(); |
| Label bb7 = new Label(); |
| Label bb8 = new Label(); |
| |
| Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); |
| Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); |
| Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); |
| Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); |
| |
| Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); |
| Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); |
| Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); |
| Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); |
| Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); |
| Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); |
| Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); |
| |
| AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); |
| |
| setCrb(crb); |
| masm.movdq(stackSlot, value); |
| if (dest.encoding != value.encoding) { |
| masm.movdqu(dest, value); |
| } |
| masm.movq(gpr1, 0x3ff0000000000000L); |
| masm.movdq(temp2, gpr1); |
| masm.movq(gpr3, 0x77f0000000000000L); |
| masm.movdq(temp3, gpr3); |
| masm.movl(gpr2, 32768); |
| masm.movdl(temp4, gpr2); |
| masm.movq(gpr2, 0xffffe00000000000L); |
| masm.movdq(temp5, gpr2); |
| masm.movdqu(temp1, value); |
| masm.pextrw(gpr1, dest, 3); |
| masm.por(dest, temp2); |
| masm.movl(gpr2, 16352); |
| masm.psrlq(dest, 27); |
| masm.leaq(gpr4, externalAddress(logTwoTablePtr)); |
| masm.psrld(dest, 2); |
| masm.rcpps(dest, dest); |
| masm.psllq(temp1, 12); |
| masm.pshufd(temp6, temp5, 0xE4); |
| masm.psrlq(temp1, 12); |
| masm.subl(gpr1, 16); |
| masm.cmpl(gpr1, 32736); |
| masm.jcc(ConditionFlag.AboveEqual, bb0); |
| |
| masm.bind(bb1); |
| masm.paddd(dest, temp4); |
| masm.por(temp1, temp3); |
| masm.movdl(gpr3, dest); |
| masm.psllq(dest, 29); |
| masm.pand(temp5, temp1); |
| masm.pand(dest, temp6); |
| masm.subsd(temp1, temp5); |
| masm.mulpd(temp5, dest); |
| masm.andl(gpr1, 32752); |
| masm.subl(gpr1, gpr2); |
| masm.cvtsi2sdl(temp7, gpr1); |
| masm.mulsd(temp1, dest); |
| masm.movdq(temp6, externalAddress(logTwoDataPtr)); // 0xfefa3800, |
| // 0x3fa62e42 |
| masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr)); // 0x92492492, |
| // 0x3fc24924, |
| // 0x00000000, |
| // 0xbfd00000 |
| masm.subsd(temp5, temp2); |
| masm.andl(gpr3, 16711680); |
| masm.shrl(gpr3, 12); |
| masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0)); |
| masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr)); |
| masm.movdqu(temp4, new AMD64Address(gpr4, 16)); // 0x3d6fb175, |
| // 0xbfc5555e, |
| // 0x55555555, |
| // 0x3fd55555 |
| masm.addsd(temp1, temp5); |
| masm.movdqu(temp2, new AMD64Address(gpr4, 32)); // 0x9999999a, |
| // 0x3fc99999, |
| // 0x00000000, |
| // 0xbfe00000 |
| masm.mulsd(temp6, temp7); |
| if (masm.supports(CPUFeature.SSE3)) { |
| masm.movddup(temp5, temp1); |
| } else { |
| masm.movdqu(temp5, temp1); |
| masm.movlhps(temp5, temp5); |
| } |
| masm.leaq(gpr4, externalAddress(logTwoDataPtr)); |
| masm.mulsd(temp7, new AMD64Address(gpr4, 8)); // 0x93c76730, |
| // 0x3ceef357 |
| masm.mulsd(temp3, temp1); |
| masm.addsd(dest, temp6); |
| masm.mulpd(temp4, temp5); |
| masm.mulpd(temp5, temp5); |
| if (masm.supports(CPUFeature.SSE3)) { |
| masm.movddup(temp6, dest); |
| } else { |
| masm.movdqu(temp6, dest); |
| masm.movlhps(temp6, temp6); |
| } |
| masm.addsd(dest, temp1); |
| masm.addpd(temp4, temp2); |
| masm.mulpd(temp3, temp5); |
| masm.subsd(temp6, dest); |
| masm.mulsd(temp4, temp1); |
| masm.pshufd(temp2, dest, 0xEE); |
| masm.addsd(temp1, temp6); |
| masm.mulsd(temp5, temp5); |
| masm.addsd(temp7, temp2); |
| masm.addpd(temp4, temp3); |
| masm.addsd(temp1, temp7); |
| masm.mulpd(temp4, temp5); |
| masm.addsd(temp1, temp4); |
| masm.pshufd(temp5, temp4, 0xEE); |
| masm.addsd(temp1, temp5); |
| masm.addsd(dest, temp1); |
| masm.jmp(bb8); |
| |
| masm.bind(bb0); |
| masm.movdq(dest, stackSlot); |
| masm.movdq(temp1, stackSlot); |
| masm.addl(gpr1, 16); |
| masm.cmpl(gpr1, 32768); |
| masm.jcc(ConditionFlag.AboveEqual, bb2); |
| |
| masm.cmpl(gpr1, 16); |
| masm.jcc(ConditionFlag.Below, bb3); |
| |
| masm.bind(bb4); |
| masm.addsd(dest, dest); |
| masm.jmp(bb8); |
| |
| masm.bind(bb5); |
| masm.jcc(ConditionFlag.Above, bb4); |
| |
| masm.cmpl(gpr3, 0); |
| masm.jcc(ConditionFlag.Above, bb4); |
| |
| masm.jmp(bb6); |
| |
| masm.bind(bb3); |
| masm.xorpd(temp1, temp1); |
| masm.addsd(temp1, dest); |
| masm.movdl(gpr3, temp1); |
| masm.psrlq(temp1, 32); |
| masm.movdl(gpr2, temp1); |
| masm.orl(gpr3, gpr2); |
| masm.cmpl(gpr3, 0); |
| masm.jcc(ConditionFlag.Equal, bb7); |
| |
| masm.xorpd(temp1, temp1); |
| masm.movl(gpr1, 18416); |
| masm.pinsrw(temp1, gpr1, 3); |
| masm.mulsd(dest, temp1); |
| masm.movdqu(temp1, dest); |
| masm.pextrw(gpr1, dest, 3); |
| masm.por(dest, temp2); |
| masm.psrlq(dest, 27); |
| masm.movl(gpr2, 18416); |
| masm.psrld(dest, 2); |
| masm.rcpps(dest, dest); |
| masm.psllq(temp1, 12); |
| masm.pshufd(temp6, temp5, 0xE4); |
| masm.psrlq(temp1, 12); |
| masm.jmp(bb1); |
| |
| masm.bind(bb2); |
| masm.movdl(gpr3, temp1); |
| masm.psrlq(temp1, 32); |
| masm.movdl(gpr2, temp1); |
| masm.addl(gpr2, gpr2); |
| masm.cmpl(gpr2, -2097152); |
| masm.jcc(ConditionFlag.AboveEqual, bb5); |
| |
| masm.orl(gpr3, gpr2); |
| masm.cmpl(gpr3, 0); |
| masm.jcc(ConditionFlag.Equal, bb7); |
| |
| masm.bind(bb6); |
| masm.xorpd(temp1, temp1); |
| masm.xorpd(dest, dest); |
| masm.movl(gpr1, 32752); |
| masm.pinsrw(temp1, gpr1, 3); |
| masm.mulsd(dest, temp1); |
| masm.jmp(bb8); |
| |
| masm.bind(bb7); |
| masm.xorpd(temp1, temp1); |
| masm.xorpd(dest, dest); |
| masm.movl(gpr1, 49136); |
| masm.pinsrw(dest, gpr1, 3); |
| masm.divsd(dest, temp1); |
| |
| masm.bind(bb8); |
| } |
| |
| private static int[] highmaskLogTen = { |
| 0xf8000000, 0xffffffff, 0x00000000, 0xffffe000 |
| }; |
| |
| private static int[] logTenE = { |
| 0x00000000, 0x3fdbc000, 0xbf2e4108, 0x3f5a7a6c |
| }; |
| |
| private static int[] logTenTable = { |
| 0x509f7800, 0x3fd34413, 0x1f12b358, 0x3d1fef31, 0x80333400, |
| 0x3fd32418, 0xc671d9d0, 0xbcf542bf, 0x51195000, 0x3fd30442, |
| 0x78a4b0c3, 0x3d18216a, 0x6fc79400, 0x3fd2e490, 0x80fa389d, |
| 0xbc902869, 0x89d04000, 0x3fd2c502, 0x75c2f564, 0x3d040754, |
| 0x4ddd1c00, 0x3fd2a598, 0xd219b2c3, 0xbcfa1d84, 0x6baa7c00, |
| 0x3fd28651, 0xfd9abec1, 0x3d1be6d3, 0x94028800, 0x3fd2672d, |
| 0xe289a455, 0xbd1ede5e, 0x78b86400, 0x3fd2482c, 0x6734d179, |
| 0x3d1fe79b, 0xcca3c800, 0x3fd2294d, 0x981a40b8, 0xbced34ea, |
| 0x439c5000, 0x3fd20a91, 0xcc392737, 0xbd1a9cc3, 0x92752c00, |
| 0x3fd1ebf6, 0x03c9afe7, 0x3d1e98f8, 0x6ef8dc00, 0x3fd1cd7d, |
| 0x71dae7f4, 0x3d08a86c, 0x8fe4dc00, 0x3fd1af25, 0xee9185a1, |
| 0xbcff3412, 0xace59400, 0x3fd190ee, 0xc2cab353, 0x3cf17ed9, |
| 0x7e925000, 0x3fd172d8, 0x6952c1b2, 0x3cf1521c, 0xbe694400, |
| 0x3fd154e2, 0xcacb79ca, 0xbd0bdc78, 0x26cbac00, 0x3fd1370d, |
| 0xf71f4de1, 0xbd01f8be, 0x72fa0800, 0x3fd11957, 0x55bf910b, |
| 0x3c946e2b, 0x5f106000, 0x3fd0fbc1, 0x39e639c1, 0x3d14a84b, |
| 0xa802a800, 0x3fd0de4a, 0xd3f31d5d, 0xbd178385, 0x0b992000, |
| 0x3fd0c0f3, 0x3843106f, 0xbd1f602f, 0x486ce800, 0x3fd0a3ba, |
| 0x8819497c, 0x3cef987a, 0x1de49400, 0x3fd086a0, 0x1caa0467, |
| 0x3d0faec7, 0x4c30cc00, 0x3fd069a4, 0xa4424372, 0xbd1618fc, |
| 0x94490000, 0x3fd04cc6, 0x946517d2, 0xbd18384b, 0xb7e84000, |
| 0x3fd03006, 0xe0109c37, 0xbd19a6ac, 0x798a0c00, 0x3fd01364, |
| 0x5121e864, 0xbd164cf7, 0x38ce8000, 0x3fcfedbf, 0x46214d1a, |
| 0xbcbbc402, 0xc8e62000, 0x3fcfb4ef, 0xdab93203, 0x3d1e0176, |
| 0x2cb02800, 0x3fcf7c5a, 0x2a2ea8e4, 0xbcfec86a, 0xeeeaa000, |
| 0x3fcf43fd, 0xc18e49a4, 0x3cf110a8, 0x9bb6e800, 0x3fcf0bda, |
| 0x923cc9c0, 0xbd15ce99, 0xc093f000, 0x3fced3ef, 0x4d4b51e9, |
| 0x3d1a04c7, 0xec58f800, 0x3fce9c3c, 0x163cad59, 0x3cac8260, |
| 0x9a907000, 0x3fce2d7d, 0x3fa93646, 0x3ce4a1c0, 0x37311000, |
| 0x3fcdbf99, 0x32abd1fd, 0x3d07ea9d, 0x6744b800, 0x3fcd528c, |
| 0x4dcbdfd4, 0xbd1b08e2, 0xe36de800, 0x3fcce653, 0x0b7b7f7f, |
| 0xbd1b8f03, 0x77506800, 0x3fcc7aec, 0xa821c9fb, 0x3d13c163, |
| 0x00ff8800, 0x3fcc1053, 0x536bca76, 0xbd074ee5, 0x70719800, |
| 0x3fcba684, 0xd7da9b6b, 0xbd1fbf16, 0xc6f8d800, 0x3fcb3d7d, |
| 0xe2220bb3, 0x3d1a295d, 0x16c15800, 0x3fcad53c, 0xe724911e, |
| 0xbcf55822, 0x82533800, 0x3fca6dbc, 0x6d982371, 0x3cac567c, |
| 0x3c19e800, 0x3fca06fc, 0x84d17d80, 0x3d1da204, 0x85ef8000, |
| 0x3fc9a0f8, 0x54466a6a, 0xbd002204, 0xb0ac2000, 0x3fc93bae, |
| 0xd601fd65, 0x3d18840c, 0x1bb9b000, 0x3fc8d71c, 0x7bf58766, |
| 0xbd14f897, 0x34aae800, 0x3fc8733e, 0x3af6ac24, 0xbd0f5c45, |
| 0x76d68000, 0x3fc81012, 0x4303e1a1, 0xbd1f9a80, 0x6af57800, |
| 0x3fc7ad96, 0x43fbcb46, 0x3cf4c33e, 0xa6c51000, 0x3fc74bc7, |
| 0x70f0eac5, 0xbd192e3b, 0xccab9800, 0x3fc6eaa3, 0xc0093dfe, |
| 0xbd0faf15, 0x8b60b800, 0x3fc68a28, 0xde78d5fd, 0xbc9ea4ee, |
| 0x9d987000, 0x3fc62a53, 0x962bea6e, 0xbd194084, 0xc9b0e800, |
| 0x3fc5cb22, 0x888dd999, 0x3d1fe201, 0xe1634800, 0x3fc56c93, |
| 0x16ada7ad, 0x3d1b1188, 0xc176c000, 0x3fc50ea4, 0x4159b5b5, |
| 0xbcf09c08, 0x51766000, 0x3fc4b153, 0x84393d23, 0xbcf6a89c, |
| 0x83695000, 0x3fc4549d, 0x9f0b8bbb, 0x3d1c4b8c, 0x538d5800, |
| 0x3fc3f881, 0xf49df747, 0x3cf89b99, 0xc8138000, 0x3fc39cfc, |
| 0xd503b834, 0xbd13b99f, 0xf0df0800, 0x3fc3420d, 0xf011b386, |
| 0xbd05d8be, 0xe7466800, 0x3fc2e7b2, 0xf39c7bc2, 0xbd1bb94e, |
| 0xcdd62800, 0x3fc28de9, 0x05e6d69b, 0xbd10ed05, 0xd015d800, |
| 0x3fc234b0, 0xe29b6c9d, 0xbd1ff967, 0x224ea800, 0x3fc1dc06, |
| 0x727711fc, 0xbcffb30d, 0x01540000, 0x3fc183e8, 0x39786c5a, |
| 0x3cc23f57, 0xb24d9800, 0x3fc12c54, 0xc905a342, 0x3d003a1d, |
| 0x82835800, 0x3fc0d54a, 0x9b9920c0, 0x3d03b25a, 0xc72ac000, |
| 0x3fc07ec7, 0x46f26a24, 0x3cf0fa41, 0xdd35d800, 0x3fc028ca, |
| 0x41d9d6dc, 0x3d034a65, 0x52474000, 0x3fbfa6a4, 0x44f66449, |
| 0x3d19cad3, 0x2da3d000, 0x3fbefcb8, 0x67832999, 0x3d18400f, |
| 0x32a10000, 0x3fbe53ce, 0x9c0e3b1a, 0xbcff62fd, 0x556b7000, |
| 0x3fbdabe3, 0x02976913, 0xbcf8243b, 0x97e88000, 0x3fbd04f4, |
| 0xec793797, 0x3d1c0578, 0x09647000, 0x3fbc5eff, 0x05fc0565, |
| 0xbd1d799e, 0xc6426000, 0x3fbbb9ff, 0x4625f5ed, 0x3d1f5723, |
| 0xf7afd000, 0x3fbb15f3, 0xdd5aae61, 0xbd1a7e1e, 0xd358b000, |
| 0x3fba72d8, 0x3314e4d3, 0x3d17bc91, 0x9b1f5000, 0x3fb9d0ab, |
| 0x9a4d514b, 0x3cf18c9b, 0x9cd4e000, 0x3fb92f69, 0x7e4496ab, |
| 0x3cf1f96d, 0x31f4f000, 0x3fb88f10, 0xf56479e7, 0x3d165818, |
| 0xbf628000, 0x3fb7ef9c, 0x26bf486d, 0xbd1113a6, 0xb526b000, |
| 0x3fb7510c, 0x1a1c3384, 0x3ca9898d, 0x8e31e000, 0x3fb6b35d, |
| 0xb3875361, 0xbd0661ac, 0xd01de000, 0x3fb6168c, 0x2a7cacfa, |
| 0xbd1bdf10, 0x0af23000, 0x3fb57a98, 0xff868816, 0x3cf046d0, |
| 0xd8ea0000, 0x3fb4df7c, 0x1515fbe7, 0xbd1fd529, 0xde3b2000, |
| 0x3fb44538, 0x6e59a132, 0x3d1faeee, 0xc8df9000, 0x3fb3abc9, |
| 0xf1322361, 0xbd198807, 0x505f1000, 0x3fb3132d, 0x0888e6ab, |
| 0x3d1e5380, 0x359bd000, 0x3fb27b61, 0xdfbcbb22, 0xbcfe2724, |
| 0x429ee000, 0x3fb1e463, 0x6eb4c58c, 0xbcfe4dd6, 0x4a673000, |
| 0x3fb14e31, 0x4ce1ac9b, 0x3d1ba691, 0x28b96000, 0x3fb0b8c9, |
| 0x8c7813b8, 0xbd0b3872, 0xc1f08000, 0x3fb02428, 0xc2bc8c2c, |
| 0x3cb5ea6b, 0x05a1a000, 0x3faf209c, 0x72e8f18e, 0xbce8df84, |
| 0xc0b5e000, 0x3fadfa6d, 0x9fdef436, 0x3d087364, 0xaf416000, |
| 0x3facd5c2, 0x1068c3a9, 0x3d0827e7, 0xdb356000, 0x3fabb296, |
| 0x120a34d3, 0x3d101a9f, 0x5dfea000, 0x3faa90e6, 0xdaded264, |
| 0xbd14c392, 0x6034c000, 0x3fa970ad, 0x1c9d06a9, 0xbd1b705e, |
| 0x194c6000, 0x3fa851e8, 0x83996ad9, 0xbd0117bc, 0xcf4ac000, |
| 0x3fa73492, 0xb1a94a62, 0xbca5ea42, 0xd67b4000, 0x3fa618a9, |
| 0x75aed8ca, 0xbd07119b, 0x9126c000, 0x3fa4fe29, 0x5291d533, |
| 0x3d12658f, 0x6f4d4000, 0x3fa3e50e, 0xcd2c5cd9, 0x3d1d5c70, |
| 0xee608000, 0x3fa2cd54, 0xd1008489, 0x3d1a4802, 0x9900e000, |
| 0x3fa1b6f9, 0x54fb5598, 0xbd16593f, 0x06bb6000, 0x3fa0a1f9, |
| 0x64ef57b4, 0xbd17636b, 0xb7940000, 0x3f9f1c9f, 0xee6a4737, |
| 0x3cb5d479, 0x91aa0000, 0x3f9cf7f5, 0x3a16373c, 0x3d087114, |
| 0x156b8000, 0x3f9ad5ed, 0x836c554a, 0x3c6900b0, 0xd4764000, |
| 0x3f98b67f, 0xed12f17b, 0xbcffc974, 0x77dec000, 0x3f9699a7, |
| 0x232ce7ea, 0x3d1e35bb, 0xbfbf4000, 0x3f947f5d, 0xd84ffa6e, |
| 0x3d0e0a49, 0x82c7c000, 0x3f92679c, 0x8d170e90, 0xbd14d9f2, |
| 0xadd20000, 0x3f90525d, 0x86d9f88e, 0x3cdeb986, 0x86f10000, |
| 0x3f8c7f36, 0xb9e0a517, 0x3ce29faa, 0xb75c8000, 0x3f885e9e, |
| 0x542568cb, 0xbd1f7bdb, 0x46b30000, 0x3f8442e8, 0xb954e7d9, |
| 0x3d1e5287, 0xb7e60000, 0x3f802c07, 0x22da0b17, 0xbd19fb27, |
| 0x6c8b0000, 0x3f7833e3, 0x821271ef, 0xbd190f96, 0x29910000, |
| 0x3f701936, 0xbc3491a5, 0xbd1bcf45, 0x354a0000, 0x3f600fe3, |
| 0xc0ff520a, 0xbd19d71c, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000 |
| }; |
| |
| private static int[] logTwoLogTenData = { |
| 0x509f7800, 0x3f934413, 0x1f12b358, 0x3cdfef31 |
| }; |
| |
| private static int[] coeffLogTenData = { |
| 0xc1a5f12e, 0x40358874, 0x64d4ef0d, 0xc0089309, 0x385593b1, |
| 0xc025c917, 0xdc963467, 0x3ffc6a02, 0x7f9d3aa1, 0x4016ab9f, |
| 0xdc77b115, 0xbff27af2 |
| }; |
| |
| /* |
| * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) |
| * Source Code |
| * |
| * ALGORITHM DESCRIPTION - LOG10() --------------------- |
| * |
| * Let x=2^k * mx, mx in [1,2) |
| * |
| * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a |
| * short approximation for log10(e) |
| * |
| * Reduced argument: r=B*mx-LH (computed accurately in high and low parts) |
| * |
| * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table |
| * (high, low parts) Result is formed from high and low parts |
| * |
| * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) = |
| * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF |
| * |
| */ |
| |
| public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { |
| ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16); |
| ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16); |
| ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16); |
| ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16); |
| ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16); |
| |
| Label bb0 = new Label(); |
| Label bb1 = new Label(); |
| Label bb2 = new Label(); |
| Label bb3 = new Label(); |
| Label bb4 = new Label(); |
| Label bb5 = new Label(); |
| Label bb6 = new Label(); |
| Label bb7 = new Label(); |
| Label bb8 = new Label(); |
| |
| Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); |
| Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); |
| Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); |
| Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); |
| |
| Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); |
| Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); |
| Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); |
| Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); |
| Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); |
| Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); |
| Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); |
| |
| AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); |
| |
| setCrb(crb); |
| masm.movdq(stackSlot, value); |
| if (dest.encoding != value.encoding) { |
| masm.movdqu(dest, value); |
| } |
| masm.movdqu(temp5, externalAddress(highmaskLogTenPtr)); // 0xf8000000, |
| // 0xffffffff, |
| // 0x00000000, |
| // 0xffffe000 |
| masm.xorpd(temp2, temp2); |
| masm.movl(gpr1, 16368); |
| masm.pinsrw(temp2, gpr1, 3); |
| masm.movl(gpr2, 1054736384); |
| masm.movdl(temp7, gpr2); |
| masm.xorpd(temp3, temp3); |
| masm.movl(gpr3, 30704); |
| masm.pinsrw(temp3, gpr3, 3); |
| masm.movl(gpr3, 32768); |
| masm.movdl(temp4, gpr3); |
| masm.movdqu(temp1, value); |
| masm.pextrw(gpr1, dest, 3); |
| masm.por(dest, temp2); |
| masm.movl(gpr2, 16352); |
| masm.psrlq(dest, 27); |
| masm.movdqu(temp2, externalAddress(logTenEPtr)); // 0x00000000, |
| // 0x3fdbc000, |
| // 0xbf2e4108, |
| // 0x3f5a7a6c |
| masm.psrld(dest, 2); |
| masm.rcpps(dest, dest); |
| masm.psllq(temp1, 12); |
| masm.pshufd(temp6, temp5, 0x4E); |
| masm.psrlq(temp1, 12); |
| masm.subl(gpr1, 16); |
| masm.cmpl(gpr1, 32736); |
| masm.jcc(ConditionFlag.AboveEqual, bb0); |
| |
| masm.bind(bb1); |
| masm.mulss(dest, temp7); |
| masm.por(temp1, temp3); |
| masm.andpd(temp5, temp1); |
| masm.paddd(dest, temp4); |
| masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr)); // 0xc1a5f12e, |
| // 0x40358874, |
| // 0x64d4ef0d, |
| // 0xc0089309 |
| masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr)); |
| masm.movdqu(temp4, new AMD64Address(gpr4, 16)); // 0x385593b1, |
| // 0xc025c917, |
| // 0xdc963467, |
| // 0x3ffc6a02 |
| masm.subsd(temp1, temp5); |
| masm.movdl(gpr3, dest); |
| masm.psllq(dest, 29); |
| masm.andpd(dest, temp6); |
| masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr)); // 0x509f7800, |
| // 0x3f934413 |
| masm.andl(gpr1, 32752); |
| masm.subl(gpr1, gpr2); |
| masm.cvtsi2sdl(temp7, gpr1); |
| masm.mulpd(temp5, dest); |
| masm.mulsd(temp1, dest); |
| masm.subsd(temp5, temp2); |
| masm.movdqu(temp2, new AMD64Address(gpr4, 32)); // 0x7f9d3aa1, |
| // 0x4016ab9f, |
| // 0xdc77b115, |
| // 0xbff27af2 |
| masm.leaq(gpr4, externalAddress(logTenTablePtr)); |
| masm.andl(gpr3, 16711680); |
| masm.shrl(gpr3, 12); |
| masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504)); |
| masm.addsd(temp1, temp5); |
| masm.mulsd(temp6, temp7); |
| masm.pshufd(temp5, temp1, 0x44); |
| masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr)); |
| masm.mulsd(temp7, new AMD64Address(gpr4, 8)); // 0x1f12b358, |
| // 0x3cdfef31 |
| masm.mulsd(temp3, temp1); |
| masm.addsd(dest, temp6); |
| masm.mulpd(temp4, temp5); |
| masm.leaq(gpr4, externalAddress(logTenEPtr)); |
| masm.movdq(temp6, new AMD64Address(gpr4, 8)); // 0xbf2e4108, |
| // 0x3f5a7a6c |
| masm.mulpd(temp5, temp5); |
| masm.addpd(temp4, temp2); |
| masm.mulpd(temp3, temp5); |
| masm.pshufd(temp2, dest, 0xE4); |
| masm.addsd(dest, temp1); |
| masm.mulsd(temp4, temp1); |
| masm.subsd(temp2, dest); |
| masm.mulsd(temp6, temp1); |
| masm.addsd(temp1, temp2); |
| masm.pshufd(temp2, dest, 0xEE); |
| masm.mulsd(temp5, temp5); |
| masm.addsd(temp7, temp2); |
| masm.addsd(temp1, temp6); |
| masm.addpd(temp4, temp3); |
| masm.addsd(temp1, temp7); |
| masm.mulpd(temp4, temp5); |
| masm.addsd(temp1, temp4); |
| masm.pshufd(temp5, temp4, 0xEE); |
| masm.addsd(temp1, temp5); |
| masm.addsd(dest, temp1); |
| masm.jmp(bb8); |
| |
| masm.bind(bb0); |
| masm.movdq(dest, stackSlot); |
| masm.movdq(temp1, stackSlot); |
| masm.addl(gpr1, 16); |
| masm.cmpl(gpr1, 32768); |
| masm.jcc(ConditionFlag.AboveEqual, bb2); |
| |
| masm.cmpl(gpr1, 16); |
| masm.jcc(ConditionFlag.Below, bb3); |
| |
| masm.bind(bb4); |
| masm.addsd(dest, dest); |
| masm.jmp(bb8); |
| |
| masm.bind(bb5); |
| masm.jcc(ConditionFlag.Above, bb4); |
| |
| masm.cmpl(gpr3, 0); |
| masm.jcc(ConditionFlag.Above, bb4); |
| |
| masm.jmp(bb6); |
| |
| masm.bind(bb3); |
| masm.xorpd(temp1, temp1); |
| masm.addsd(temp1, dest); |
| masm.movdl(gpr3, temp1); |
| masm.psrlq(temp1, 32); |
| masm.movdl(gpr2, temp1); |
| masm.orl(gpr3, gpr2); |
| masm.cmpl(gpr3, 0); |
| masm.jcc(ConditionFlag.Equal, bb7); |
| |
| masm.xorpd(temp1, temp1); |
| masm.xorpd(temp2, temp2); |
| masm.movl(gpr1, 18416); |
| masm.pinsrw(temp1, gpr1, 3); |
| masm.mulsd(dest, temp1); |
| masm.movl(gpr1, 16368); |
| masm.pinsrw(temp2, gpr1, 3); |
| masm.movdqu(temp1, dest); |
| masm.pextrw(gpr1, dest, 3); |
| masm.por(dest, temp2); |
| masm.movl(gpr2, 18416); |
| masm.psrlq(dest, 27); |
| masm.movdqu(temp2, externalAddress(logTenEPtr)); // 0x00000000, |
| // 0x3fdbc000, |
| // 0xbf2e4108, |
| // 0x3f5a7a6c |
| masm.psrld(dest, 2); |
| masm.rcpps(dest, dest); |
| masm.psllq(temp1, 12); |
| masm.pshufd(temp6, temp5, 0x4E); |
| masm.psrlq(temp1, 12); |
| masm.jmp(bb1); |
| |
| masm.bind(bb2); |
| masm.movdl(gpr3, temp1); |
| masm.psrlq(temp1, 32); |
| masm.movdl(gpr2, temp1); |
| masm.addl(gpr2, gpr2); |
| masm.cmpl(gpr2, -2097152); |
| masm.jcc(ConditionFlag.AboveEqual, bb5); |
| |
| masm.orl(gpr3, gpr2); |
| masm.cmpl(gpr3, 0); |
| masm.jcc(ConditionFlag.Equal, bb7); |
| |
| masm.bind(bb6); |
| masm.xorpd(temp1, temp1); |
| masm.xorpd(dest, dest); |
| masm.movl(gpr1, 32752); |
| masm.pinsrw(temp1, gpr1, 3); |
| masm.mulsd(dest, temp1); |
| masm.jmp(bb8); |
| |
| masm.bind(bb7); |
| masm.xorpd(temp1, temp1); |
| masm.xorpd(dest, dest); |
| masm.movl(gpr1, 49136); |
| masm.pinsrw(dest, gpr1, 3); |
| masm.divsd(dest, temp1); |
| |
| masm.bind(bb8); |
| } |
| |
| /* |
| * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) |
| * Source Code |
| * |
| * ALGORITHM DESCRIPTION - SIN() --------------------- |
| * |
| * 1. RANGE REDUCTION |
| * |
| * We perform an initial range reduction from X to r with |
| * |
| * X =~= N * pi/32 + r |
| * |
| * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this, |
| * the range reduction is insufficiently accurate. For extremely small inputs, denormalization |
| * can occur internally, impacting performance. This means that the main path is actually only |
| * taken for 2^-252 <= |X| < 90112. |
| * |
| * To avoid branches, we perform the range reduction to full accuracy each time. |
| * |
| * X - N * (P_1 + P_2 + P_3) |
| * |
| * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit |
| * number. Together, these approximate pi well enough for all cases in the restricted range. |
| * |
| * The main reduction sequence is: |
| * |
| * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER) |
| * |
| * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the |
| * calculation) |
| * |
| * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3 |
| * |
| * 2. MAIN ALGORITHM |
| * |
| * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored |
| * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 * |
| * 53-bit sin(B) |
| * |
| * The computation is organized as follows: |
| * |
| * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] + |
| * cos(B) * [sin(r + c) - r] |
| * |
| * which is approximately: |
| * |
| * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) * |
| * [(sin(r) - r) + c] |
| * |
| * and this is what is actually computed. We separate this sum into four parts: |
| * |
| * hi + med + pols + corr |
| * |
| * where |
| * |
| * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) |
| * corr = S_lo + c * ((C_hl + sigma) - S_hi * r) |
| * |
| * 3. POLYNOMIAL |
| * |
| * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely, |
| * since it is quite small, so we exploit parallelism to the fullest. |
| * |
| * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4 |
| * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * <S_hi * r^2 |
| * | (C_hl + sigma) * r^3> |
| * |
| * 4. CORRECTION TERM |
| * |
| * This is where the "c" component of the range reduction is taken into account; recall that |
| * just "r" is used for most of the calculation. |
| * |
| * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo |
| * |
| * 5. COMPENSATED SUMMATIONS |
| * |
| * The two successive compensated summations add up the high and medium parts, leaving just the |
| * low parts to add up at the end. |
| * |
| * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi |
| * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med |
| * |
| * 6. FINAL SUMMATION |
| * |
| * We now add up all the small parts: |
| * |
| * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 |
| * |
| * Now the overall result is just: |
| * |
| * res_hi + res_lo |
| * |
| * 7. SMALL ARGUMENTS |
| * |
| * If |x| < SNN (SNN meaning the smallest normal number), we simply perform 0.1111111 cdots 1111 |
| * * x. For SNN <= |x|, we do 2^-55 * (2^55 * x - x). |
| * |
| * Special cases: sin(NaN) = quiet NaN, and raise invalid exception sin(INF) = NaN and raise |
| * invalid exception sin(+/-0) = +/-0 |
| * |
| */ |
| |
| public int[] oneHalf = { |
| 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 |
| }; |
| |
| public int[] pTwo = { |
| 0x1a600000, 0x3d90b461, 0x1a600000, 0x3d90b461 |
| }; |
| |
| public int[] scFour = { |
| 0xa556c734, 0x3ec71de3, 0x1a01a01a, 0x3efa01a0 |
| }; |
| |
| public int[] cTable = { |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x3ff00000, 0x176d6d31, 0xbf73b92e, |
| 0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000, |
| 0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0x3fc8f8b8, |
| 0xc0000000, 0xbc626d19, 0x00000000, 0x3ff00000, 0x939d225a, |
| 0xbfa60bea, 0x2ed59f06, 0x3fd29406, 0xa0000000, 0xbc75d28d, |
| 0x00000000, 0x3ff00000, 0x866b95cf, 0xbfb37ca1, 0xa6aea963, |
| 0x3fd87de2, 0xe0000000, 0xbc672ced, 0x00000000, 0x3ff00000, |
| 0x73fa1279, 0xbfbe3a68, 0x3806f63b, 0x3fde2b5d, 0x20000000, |
| 0x3c5e0d89, 0x00000000, 0x3ff00000, 0x5bc57974, 0xbfc59267, |
| 0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000, |
| 0x3ff00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0x3fe44cf3, |
| 0x20000000, 0x3c68076a, 0x00000000, 0x3ff00000, 0x99fcef32, |
| 0x3fca8279, 0x667f3bcd, 0x3fe6a09e, 0x20000000, 0xbc8bdd34, |
| 0x00000000, 0x3fe00000, 0x94247758, 0x3fc133cc, 0x6b151741, |
| 0x3fe8bc80, 0x20000000, 0xbc82c5e1, 0x00000000, 0x3fe00000, |
| 0x9ae68c87, 0x3fac73b3, 0x290ea1a3, 0x3fea9b66, 0xe0000000, |
| 0x3c39f630, 0x00000000, 0x3fe00000, 0x7f909c4e, 0xbf9d4a2c, |
| 0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000, |
| 0x3fe00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0x3fed906b, |
| 0x20000000, 0x3c7457e6, 0x00000000, 0x3fe00000, 0x76acf82d, |
| 0x3fa4a031, 0x56c62dda, 0x3fee9f41, 0xe0000000, 0x3c8760b1, |
| 0x00000000, 0x3fd00000, 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, |
| 0x3fef6297, 0x20000000, 0x3c756217, 0x00000000, 0x3fd00000, |
| 0x0f592f50, 0xbf9ba165, 0xa3d12526, 0x3fefd88d, 0x40000000, |
| 0xbc887df6, 0x00000000, 0x3fc00000, 0x00000000, 0x00000000, |
| 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0x3fefd88d, |
| 0x40000000, 0xbc887df6, 0x00000000, 0xbfc00000, 0x0e5967d5, |
| 0x3fac1d1f, 0xcff75cb0, 0x3fef6297, 0x20000000, 0x3c756217, |
| 0x00000000, 0xbfd00000, 0x76acf82d, 0xbfa4a031, 0x56c62dda, |
| 0x3fee9f41, 0xe0000000, 0x3c8760b1, 0x00000000, 0xbfd00000, |
| 0x65455a75, 0x3fbe0875, 0xcf328d46, 0x3fed906b, 0x20000000, |
| 0x3c7457e6, 0x00000000, 0xbfe00000, 0x7f909c4e, 0x3f9d4a2c, |
| 0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000, |
| 0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0x3fea9b66, |
| 0xe0000000, 0x3c39f630, 0x00000000, 0xbfe00000, 0x94247758, |
| 0xbfc133cc, 0x6b151741, 0x3fe8bc80, 0x20000000, 0xbc82c5e1, |
| 0x00000000, 0xbfe00000, 0x99fcef32, 0xbfca8279, 0x667f3bcd, |
| 0x3fe6a09e, 0x20000000, 0xbc8bdd34, 0x00000000, 0xbfe00000, |
| 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, 0x3fe44cf3, 0x20000000, |
| 0x3c68076a, 0x00000000, 0xbff00000, 0x5bc57974, 0x3fc59267, |
| 0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000, |
| 0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0x3fde2b5d, |
| 0x20000000, 0x3c5e0d89, 0x00000000, 0xbff00000, 0x866b95cf, |
| 0x3fb37ca1, 0xa6aea963, 0x3fd87de2, 0xe0000000, 0xbc672ced, |
| 0x00000000, 0xbff00000, 0x939d225a, 0x3fa60bea, 0x2ed59f06, |
| 0x3fd29406, 0xa0000000, 0xbc75d28d, 0x00000000, 0xbff00000, |
| 0x011469fb, 0x3f93ad06, 0x3c69a60b, 0x3fc8f8b8, 0xc0000000, |
| 0xbc626d19, 0x00000000, 0xbff00000, 0x176d6d31, 0x3f73b92e, |
| 0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000, |
| 0xbff00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0xbff00000, 0x176d6d31, |
| 0x3f73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718, |
| 0x00000000, 0xbff00000, 0x011469fb, 0x3f93ad06, 0x3c69a60b, |
| 0xbfc8f8b8, 0xc0000000, 0x3c626d19, 0x00000000, 0xbff00000, |
| 0x939d225a, 0x3fa60bea, 0x2ed59f06, 0xbfd29406, 0xa0000000, |
| 0x3c75d28d, 0x00000000, 0xbff00000, 0x866b95cf, 0x3fb37ca1, |
| 0xa6aea963, 0xbfd87de2, 0xe0000000, 0x3c672ced, 0x00000000, |
| 0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0xbfde2b5d, |
| 0x20000000, 0xbc5e0d89, 0x00000000, 0xbff00000, 0x5bc57974, |
| 0x3fc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd, |
| 0x00000000, 0xbff00000, 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, |
| 0xbfe44cf3, 0x20000000, 0xbc68076a, 0x00000000, 0xbff00000, |
| 0x99fcef32, 0xbfca8279, 0x667f3bcd, 0xbfe6a09e, 0x20000000, |
| 0x3c8bdd34, 0x00000000, 0xbfe00000, 0x94247758, 0xbfc133cc, |
| 0x6b151741, 0xbfe8bc80, 0x20000000, 0x3c82c5e1, 0x00000000, |
| 0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0xbfea9b66, |
| 0xe0000000, 0xbc39f630, 0x00000000, 0xbfe00000, 0x7f909c4e, |
| 0x3f9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1, |
| 0x00000000, 0xbfe00000, 0x65455a75, 0x3fbe0875, 0xcf328d46, |
| 0xbfed906b, 0x20000000, 0xbc7457e6, 0x00000000, 0xbfe00000, |
| 0x76acf82d, 0xbfa4a031, 0x56c62dda, 0xbfee9f41, 0xe0000000, |
| 0xbc8760b1, 0x00000000, 0xbfd00000, 0x0e5967d5, 0x3fac1d1f, |
| 0xcff75cb0, 0xbfef6297, 0x20000000, 0xbc756217, 0x00000000, |
| 0xbfd00000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0xbfefd88d, |
| 0x40000000, 0x3c887df6, 0x00000000, 0xbfc00000, 0x00000000, |
| 0x00000000, 0x00000000, 0xbff00000, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x0f592f50, 0xbf9ba165, 0xa3d12526, |
| 0xbfefd88d, 0x40000000, 0x3c887df6, 0x00000000, 0x3fc00000, |
| 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, 0xbfef6297, 0x20000000, |
| 0xbc756217, 0x00000000, 0x3fd00000, 0x76acf82d, 0x3fa4a031, |
| 0x56c62dda, 0xbfee9f41, 0xe0000000, 0xbc8760b1, 0x00000000, |
| 0x3fd00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0xbfed906b, |
| 0x20000000, 0xbc7457e6, 0x00000000, 0x3fe00000, 0x7f909c4e, |
| 0xbf9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1, |
| 0x00000000, 0x3fe00000, 0x9ae68c87, 0x3fac73b3, 0x290ea1a3, |
| 0xbfea9b66, 0xe0000000, 0xbc39f630, 0x00000000, 0x3fe00000, |
| 0x94247758, 0x3fc133cc, 0x6b151741, 0xbfe8bc80, 0x20000000, |
| 0x3c82c5e1, 0x00000000, 0x3fe00000, 0x99fcef32, 0x3fca8279, |
| 0x667f3bcd, 0xbfe6a09e, 0x20000000, 0x3c8bdd34, 0x00000000, |
| 0x3fe00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0xbfe44cf3, |
| 0x20000000, 0xbc68076a, 0x00000000, 0x3ff00000, 0x5bc57974, |
| 0xbfc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd, |
| 0x00000000, 0x3ff00000, 0x73fa1279, 0xbfbe3a68, 0x3806f63b, |
| 0xbfde2b5d, 0x20000000, 0xbc5e0d89, 0x00000000, 0x3ff00000, |
| 0x866b95cf, 0xbfb37ca1, 0xa6aea963, 0xbfd87de2, 0xe0000000, |
| 0x3c672ced, 0x00000000, 0x3ff00000, 0x939d225a, 0xbfa60bea, |
| 0x2ed59f06, 0xbfd29406, 0xa0000000, 0x3c75d28d, 0x00000000, |
| 0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0xbfc8f8b8, |
| 0xc0000000, 0x3c626d19, 0x00000000, 0x3ff00000, 0x176d6d31, |
| 0xbf73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718, |
| 0x00000000, 0x3ff00000 |
| }; |
| |
| public int[] scTwo = { |
| 0x11111111, 0x3f811111, 0x55555555, 0x3fa55555 |
| }; |
| |
| public int[] scThree = { |
| 0x1a01a01a, 0xbf2a01a0, 0x16c16c17, 0xbf56c16c |
| }; |
| |
| public int[] scOne = { |
| 0x55555555, 0xbfc55555, 0x00000000, 0xbfe00000 |
| }; |
| |
| public int[] piInvTable = { |
| 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, |
| 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, |
| 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, |
| 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, |
| 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, |
| 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, |
| 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, |
| 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, |
| 0xf0cfbc21 |
| }; |
| |
| public int[] piFour = { |
| 0x40000000, 0x3fe921fb, 0x18469899, 0x3e64442d |
| }; |
| |
| public int[] piThirtyTwoInv = { |
| 0x6dc9c883, 0x40245f30 |
| }; |
| |
| public int[] shifter = { |
| 0x00000000, 0x43380000 |
| }; |
| |
| public int[] signMask = { |
| 0x00000000, 0x80000000 |
| }; |
| |
| public int[] pThree = { |
| 0x2e037073, 0x3b63198a |
| }; |
| |
| public int[] allOnes = { |
| 0xffffffff, 0x3fefffff |
| }; |
| |
| public int[] twoPowFiftyFive = { |
| 0x00000000, 0x43600000 |
| }; |
| |
| public int[] twoPowFiftyFiveM = { |
| 0x00000000, 0x3c800000 |
| }; |
| |
| public int[] pOne = { |
| 0x54400000, 0x3fb921fb |
| }; |
| |
| public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { |
| ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16); |
| ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16); |
| ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16); |
| ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16); |
| ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16); |
| ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16); |
| ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16); |
| ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16); |
| ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16); |
| ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8); |
| ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8); |
| ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8); |
| ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8); |
| ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8); |
| ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8); |
| ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8); |
| ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8); |
| |
| Label bb0 = new Label(); |
| Label bb1 = new Label(); |
| Label bb2 = new Label(); |
| Label bb4 = new Label(); |
| Label bb5 = new Label(); |
| Label bb6 = new Label(); |
| Label bb8 = new Label(); |
| Label bb9 = new Label(); |
| Label bb10 = new Label(); |
| Label bb11 = new Label(); |
| Label bb12 = new Label(); |
| Label bb13 = new Label(); |
| Label bb14 = new Label(); |
| Label bb15 = new Label(); |
| |
| Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); |
| Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); |
| Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); |
| Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); |
| Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); |
| Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); |
| Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); |
| Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); |
| Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); |
| Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); |
| |
| Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); |
| Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); |
| Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); |
| Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); |
| Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); |
| Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); |
| Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); |
| Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); |
| Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); |
| |
| AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); |
| |
| setCrb(crb); |
| masm.movsd(stackSlot, value); |
| if (dest.encoding != value.encoding) { |
| masm.movdqu(dest, value); |
| } |
| |
| masm.leaq(gpr1, stackSlot); |
| masm.movl(gpr1, new AMD64Address(gpr1, 4)); |
| masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, |
| // 0x40245f30 |
| masm.movdq(temp2, externalAddress(shifterPtr)); // 0x00000000, |
| // 0x43380000 |
| |
| masm.andl(gpr1, 2147418112); |
| masm.subl(gpr1, 808452096); |
| masm.cmpl(gpr1, 281346048); |
| masm.jcc(ConditionFlag.Above, bb0); |
| |
| masm.mulsd(temp1, dest); |
| masm.movdqu(temp5, externalAddress(oneHalfPtr)); // 0x00000000, |
| // 0x3fe00000, |
| // 0x00000000, |
| // 0x3fe00000 |
| masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, |
| // 0x80000000 |
| masm.pand(temp4, dest); |
| masm.por(temp5, temp4); |
| masm.addpd(temp1, temp5); |
| masm.cvttsd2sil(gpr4, temp1); |
| masm.cvtsi2sdl(temp1, gpr4); |
| masm.movdqu(temp6, externalAddress(pTwoPtr)); // 0x1a600000, |
| // 0x3d90b461, |
| // 0x1a600000, |
| // 0x3d90b461 |
| masm.movq(gpr7, 0x3fb921fb54400000L); |
| masm.movdq(temp3, gpr7); |
| masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, |
| // 0x3ec71de3, |
| // 0x1a01a01a, |
| // 0x3efa01a0 |
| masm.pshufd(temp4, dest, 0x44); |
| masm.mulsd(temp3, temp1); |
| if (masm.supports(CPUFeature.SSE3)) { |
| masm.movddup(temp1, temp1); |
| } else { |
| masm.movlhps(temp1, temp1); |
| } |
| masm.andl(gpr4, 63); |
| masm.shll(gpr4, 5); |
| masm.leaq(gpr1, externalAddress(cTablePtr)); |
| masm.addq(gpr1, gpr4); |
| masm.movdqu(temp8, new AMD64Address(gpr1, 0)); |
| masm.mulpd(temp6, temp1); |
| masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, |
| // 0x3b63198a |
| masm.subsd(temp4, temp3); |
| masm.subsd(dest, temp3); |
| if (masm.supports(CPUFeature.SSE3)) { |
| masm.movddup(temp3, temp4); |
| } else { |
| masm.movdqu(temp3, temp4); |
| masm.movlhps(temp3, temp3); |
| } |
| masm.subsd(temp4, temp6); |
| masm.pshufd(dest, dest, 0x44); |
| masm.pshufd(temp7, temp8, 0xE); |
| masm.movdqu(temp2, temp8); |
| masm.movdqu(temp9, temp7); |
| masm.mulpd(temp5, dest); |
| masm.subpd(dest, temp6); |
| masm.mulsd(temp7, temp4); |
| masm.subsd(temp3, temp4); |
| masm.mulpd(temp5, dest); |
| masm.mulpd(dest, dest); |
| masm.subsd(temp3, temp6); |
| masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, |
| // 0x3f811111, |
| // 0x55555555, |
| // 0x3fa55555 |
| masm.subsd(temp1, temp3); |
| masm.movdq(temp3, new AMD64Address(gpr1, 24)); |
| masm.addsd(temp2, temp3); |
| masm.subsd(temp7, temp2); |
| masm.mulsd(temp2, temp4); |
| masm.mulpd(temp6, dest); |
| masm.mulsd(temp3, temp4); |
| masm.mulpd(temp2, dest); |
| masm.mulpd(dest, dest); |
| masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, |
| // 0xbf2a01a0, |
| // 0x16c16c17, |
| // 0xbf56c16c |
| masm.mulsd(temp4, temp8); |
| masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, |
| // 0xbfc55555, |
| // 0x00000000, |
| // 0xbfe00000 |
| masm.mulpd(temp5, dest); |
| masm.movdqu(dest, temp3); |
| masm.addsd(temp3, temp9); |
| masm.mulpd(temp1, temp7); |
| masm.movdqu(temp7, temp4); |
| masm.addsd(temp4, temp3); |
| masm.addpd(temp6, temp5); |
| masm.subsd(temp9, temp3); |
| masm.subsd(temp3, temp4); |
| masm.addsd(temp1, new AMD64Address(gpr1, 16)); |
| masm.mulpd(temp6, temp2); |
| masm.addsd(temp9, dest); |
| masm.addsd(temp3, temp7); |
| masm.addsd(temp1, temp9); |
| masm.addsd(temp1, temp3); |
| masm.addsd(temp1, temp6); |
| masm.unpckhpd(temp6, temp6); |
| masm.movdqu(dest, temp4); |
| masm.addsd(temp1, temp6); |
| masm.addsd(dest, temp1); |
| masm.jmp(bb15); |
| |
| masm.bind(bb14); |
| masm.xorpd(temp1, temp1); |
| masm.xorpd(dest, dest); |
| masm.divsd(dest, temp1); |
| masm.jmp(bb15); |
| |
| masm.bind(bb0); |
| masm.jcc(ConditionFlag.Greater, bb1); |
| |
| masm.shrl(gpr1, 20); |
| masm.cmpl(gpr1, 3325); |
| masm.jcc(ConditionFlag.NotEqual, bb2); |
| |
| masm.mulsd(dest, externalAddress(allOnesPtr)); // 0xffffffff, |
| // 0x3fefffff |
| masm.jmp(bb15); |
| |
| masm.bind(bb2); |
| masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr)); // 0x00000000, |
| // 0x43600000 |
| masm.mulsd(temp3, dest); |
| masm.subsd(temp3, dest); |
| masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr)); // 0x00000000, |
| // 0x3c800000 |
| masm.jmp(bb15); |
| |
| masm.bind(bb1); |
| masm.pextrw(gpr3, dest, 3); |
| masm.andl(gpr3, 32752); |
| masm.cmpl(gpr3, 32752); |
| masm.jcc(ConditionFlag.Equal, bb14); |
| |
| masm.subl(gpr3, 16224); |
| masm.shrl(gpr3, 7); |
| masm.andl(gpr3, 65532); |
| masm.leaq(gpr10, externalAddress(piInvTablePtr)); |
| masm.addq(gpr3, gpr10); |
| masm.movdq(gpr1, dest); |
| masm.movl(gpr9, new AMD64Address(gpr3, 20)); |
| masm.movl(gpr7, new AMD64Address(gpr3, 24)); |
| masm.movl(gpr4, gpr1); |
| masm.shrq(gpr1, 21); |
| masm.orl(gpr1, Integer.MIN_VALUE); |
| masm.shrl(gpr1, 11); |
| masm.movl(gpr8, gpr9); |
| masm.imulq(gpr9, gpr4); |
| masm.imulq(gpr8, gpr1); |
| masm.imulq(gpr7, gpr1); |
| masm.movl(gpr5, new AMD64Address(gpr3, 16)); |
| masm.movl(gpr6, new AMD64Address(gpr3, 12)); |
| masm.movl(gpr10, gpr9); |
| masm.shrq(gpr9, 32); |
| masm.addq(gpr8, gpr9); |
| masm.addq(gpr10, gpr7); |
| masm.movl(gpr7, gpr10); |
| masm.shrq(gpr10, 32); |
| masm.addq(gpr8, gpr10); |
| masm.movl(gpr9, gpr5); |
| masm.imulq(gpr5, gpr4); |
| masm.imulq(gpr9, gpr1); |
| masm.movl(gpr10, gpr6); |
| masm.imulq(gpr6, gpr4); |
| masm.movl(gpr2, gpr5); |
| masm.shrq(gpr5, 32); |
| masm.addq(gpr8, gpr2); |
| masm.movl(gpr2, gpr8); |
| masm.shrq(gpr8, 32); |
| masm.addq(gpr9, gpr5); |
| masm.addq(gpr9, gpr8); |
| masm.shlq(gpr2, 32); |
| masm.orq(gpr7, gpr2); |
| masm.imulq(gpr10, gpr1); |
| masm.movl(gpr8, new AMD64Address(gpr3, 8)); |
| masm.movl(gpr5, new AMD64Address(gpr3, 4)); |
| masm.movl(gpr2, gpr6); |
| masm.shrq(gpr6, 32); |
| masm.addq(gpr9, gpr2); |
| masm.movl(gpr2, gpr9); |
| masm.shrq(gpr9, 32); |
| masm.addq(gpr10, gpr6); |
| masm.addq(gpr10, gpr9); |
| masm.movq(gpr6, gpr8); |
| masm.imulq(gpr8, gpr4); |
| masm.imulq(gpr6, gpr1); |
| masm.movl(gpr9, gpr8); |
| masm.shrq(gpr8, 32); |
| masm.addq(gpr10, gpr9); |
| masm.movl(gpr9, gpr10); |
| masm.shrq(gpr10, 32); |
| masm.addq(gpr6, gpr8); |
| masm.addq(gpr6, gpr10); |
| masm.movq(gpr8, gpr5); |
| masm.imulq(gpr5, gpr4); |
| masm.imulq(gpr8, gpr1); |
| masm.shlq(gpr9, 32); |
| masm.orq(gpr9, gpr2); |
| masm.movl(gpr1, new AMD64Address(gpr3, 0)); |
| masm.movl(gpr10, gpr5); |
| masm.shrq(gpr5, 32); |
| masm.addq(gpr6, gpr10); |
| masm.movl(gpr10, gpr6); |
| masm.shrq(gpr6, 32); |
| masm.addq(gpr8, gpr5); |
| masm.addq(gpr8, gpr6); |
| masm.imulq(gpr4, gpr1); |
| masm.pextrw(gpr2, dest, 3); |
| masm.leaq(gpr6, externalAddress(piInvTablePtr)); |
| masm.subq(gpr3, gpr6); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, 19); |
| masm.movl(gpr5, 32768); |
| masm.andl(gpr5, gpr2); |
| masm.shrl(gpr2, 4); |
| masm.andl(gpr2, 2047); |
| masm.subl(gpr2, 1023); |
| masm.subl(gpr3, gpr2); |
| masm.addq(gpr8, gpr4); |
| masm.movl(gpr4, gpr3); |
| masm.addl(gpr4, 32); |
| masm.cmpl(gpr3, 1); |
| masm.jcc(ConditionFlag.Less, bb4); |
| |
| masm.negl(gpr3); |
| masm.addl(gpr3, 29); |
| masm.shll(gpr8); |
| masm.movl(gpr6, gpr8); |
| masm.andl(gpr8, 536870911); |
| masm.testl(gpr8, 268435456); |
| masm.jcc(ConditionFlag.NotEqual, bb5); |
| |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 0); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| |
| masm.bind(bb6); |
| |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.Equal, bb8); |
| |
| masm.bind(bb9); |
| masm.bsrq(gpr10, gpr8); |
| masm.movl(gpr3, 29); |
| masm.subl(gpr3, gpr10); |
| masm.jcc(ConditionFlag.LessEqual, bb10); |
| |
| masm.shlq(gpr8); |
| masm.movq(gpr1, gpr9); |
| masm.shlq(gpr9); |
| masm.addl(gpr4, gpr3); |
| masm.negl(gpr3); |
| masm.addl(gpr3, 64); |
| masm.shrq(gpr1); |
| masm.shrq(gpr7); |
| masm.orq(gpr8, gpr1); |
| masm.orq(gpr9, gpr7); |
| |
| masm.bind(bb11); |
| masm.cvtsi2sdq(dest, gpr8); |
| masm.shrq(gpr9, 1); |
| masm.cvtsi2sdq(temp3, gpr9); |
| masm.xorpd(temp4, temp4); |
| masm.shll(gpr4, 4); |
| masm.negl(gpr4); |
| masm.addl(gpr4, 16368); |
| masm.orl(gpr4, gpr5); |
| masm.xorl(gpr4, gpr2); |
| masm.pinsrw(temp4, gpr4, 3); |
| masm.leaq(gpr1, externalAddress(piFourPtr)); |
| masm.movdqu(temp2, new AMD64Address(gpr1, 0)); // 0x40000000, |
| // 0x3fe921fb, |
| // 0x18469899, |
| // 0x3e64442d |
| masm.xorpd(temp5, temp5); |
| masm.subl(gpr4, 1008); |
| masm.pinsrw(temp5, gpr4, 3); |
| masm.mulsd(dest, temp4); |
| masm.shll(gpr5, 16); |
| masm.sarl(gpr5, 31); |
| masm.mulsd(temp3, temp5); |
| masm.movdqu(temp1, dest); |
| masm.pshufd(temp6, temp2, 0xE); |
| masm.mulsd(dest, temp2); |
| masm.shrl(gpr6, 29); |
| masm.addsd(temp1, temp3); |
| masm.mulsd(temp3, temp2); |
| masm.addl(gpr6, gpr5); |
| masm.xorl(gpr6, gpr5); |
| masm.mulsd(temp6, temp1); |
| masm.movl(gpr1, gpr6); |
| masm.addsd(temp6, temp3); |
| masm.movdqu(temp2, dest); |
| masm.addsd(dest, temp6); |
| masm.subsd(temp2, dest); |
| masm.addsd(temp6, temp2); |
| |
| masm.bind(bb12); |
| masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, |
| // 0x40245f30 |
| masm.mulsd(temp1, dest); |
| masm.movdq(temp5, externalAddress(oneHalfPtr)); // 0x00000000, |
| // 0x3fe00000, |
| // 0x00000000, |
| // 0x3fe00000 |
| masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, |
| // 0x80000000 |
| masm.pand(temp4, dest); |
| masm.por(temp5, temp4); |
| masm.addpd(temp1, temp5); |
| masm.cvttsd2sil(gpr4, temp1); |
| masm.cvtsi2sdl(temp1, gpr4); |
| masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, |
| // 0x3fb921fb |
| masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, |
| // 0x3d90b461, |
| // 0x1a600000, |
| // 0x3d90b461 |
| masm.mulsd(temp3, temp1); |
| masm.unpcklpd(temp1, temp1); |
| masm.shll(gpr1, 3); |
| masm.addl(gpr4, 1865216); |
| masm.movdqu(temp4, dest); |
| masm.addl(gpr4, gpr1); |
| masm.andl(gpr4, 63); |
| masm.movdqu(temp5, externalAddress(scFourPtr)); // 0x54400000, |
| // 0x3fb921fb |
| masm.leaq(gpr1, externalAddress(cTablePtr)); |
| masm.shll(gpr4, 5); |
| masm.addq(gpr1, gpr4); |
| masm.movdqu(temp8, new AMD64Address(gpr1, 0)); |
| masm.mulpd(temp2, temp1); |
| masm.subsd(dest, temp3); |
| masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, |
| // 0x3b63198a |
| masm.subsd(temp4, temp3); |
| masm.unpcklpd(dest, dest); |
| masm.movdqu(temp3, temp4); |
| masm.subsd(temp4, temp2); |
| masm.mulpd(temp5, dest); |
| masm.subpd(dest, temp2); |
| masm.pshufd(temp7, temp8, 0xE); |
| masm.movdqu(temp9, temp7); |
| masm.mulsd(temp7, temp4); |
| masm.subsd(temp3, temp4); |
| masm.mulpd(temp5, dest); |
| masm.mulpd(dest, dest); |
| masm.subsd(temp3, temp2); |
| masm.movdqu(temp2, temp8); |
| masm.subsd(temp1, temp3); |
| masm.movdq(temp3, new AMD64Address(gpr1, 24)); |
| masm.addsd(temp2, temp3); |
| masm.subsd(temp7, temp2); |
| masm.subsd(temp1, temp6); |
| masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, |
| // 0x3f811111, |
| // 0x55555555, |
| // 0x3fa55555 |
| masm.mulsd(temp2, temp4); |
| masm.mulpd(temp6, dest); |
| masm.mulsd(temp3, temp4); |
| masm.mulpd(temp2, dest); |
| masm.mulpd(dest, dest); |
| masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, |
| // 0xbf2a01a0, |
| // 0x16c16c17, |
| // 0xbf56c16c |
| masm.mulsd(temp4, temp8); |
| masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, |
| // 0xbfc55555, |
| // 0x00000000, |
| // 0xbfe00000 |
| masm.mulpd(temp5, dest); |
| masm.movdqu(dest, temp3); |
| masm.addsd(temp3, temp9); |
| masm.mulpd(temp1, temp7); |
| masm.movdqu(temp7, temp4); |
| masm.addsd(temp4, temp3); |
| masm.addpd(temp6, temp5); |
| masm.subsd(temp9, temp3); |
| masm.subsd(temp3, temp4); |
| masm.addsd(temp1, new AMD64Address(gpr1, 16)); |
| masm.mulpd(temp6, temp2); |
| masm.addsd(temp9, dest); |
| masm.addsd(temp3, temp7); |
| masm.addsd(temp1, temp9); |
| masm.addsd(temp1, temp3); |
| masm.addsd(temp1, temp6); |
| masm.unpckhpd(temp6, temp6); |
| masm.movdqu(dest, temp4); |
| masm.addsd(temp1, temp6); |
| masm.addsd(dest, temp1); |
| masm.jmp(bb15); |
| |
| masm.bind(bb8); |
| masm.addl(gpr4, 64); |
| masm.movq(gpr8, gpr9); |
| masm.movq(gpr9, gpr7); |
| masm.movl(gpr7, 0); |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.NotEqual, bb9); |
| |
| masm.addl(gpr4, 64); |
| masm.movq(gpr8, gpr9); |
| masm.movq(gpr9, gpr7); |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.NotEqual, bb9); |
| |
| masm.xorpd(dest, dest); |
| masm.xorpd(temp6, temp6); |
| masm.jmp(bb12); |
| |
| masm.bind(bb10); |
| masm.jcc(ConditionFlag.Equal, bb11); |
| |
| masm.negl(gpr3); |
| masm.shrq(gpr9); |
| masm.movq(gpr1, gpr8); |
| masm.shrq(gpr8); |
| masm.subl(gpr4, gpr3); |
| masm.negl(gpr3); |
| masm.addl(gpr3, 64); |
| masm.shlq(gpr1); |
| masm.orq(gpr9, gpr1); |
| masm.jmp(bb11); |
| |
| masm.bind(bb4); |
| masm.negl(gpr3); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| masm.shlq(gpr8); |
| masm.movq(gpr6, gpr8); |
| masm.testl(gpr8, Integer.MIN_VALUE); |
| masm.jcc(ConditionFlag.NotEqual, bb13); |
| |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 0); |
| masm.shrq(gpr6, 3); |
| masm.jmp(bb6); |
| |
| masm.bind(bb5); |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 536870912); |
| masm.shrl(gpr2); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| masm.shlq(gpr2, 32); |
| masm.addl(gpr6, 536870912); |
| masm.movl(gpr3, 0); |
| masm.movl(gpr10, 0); |
| masm.subq(gpr3, gpr7); |
| masm.sbbq(gpr10, gpr9); |
| masm.sbbq(gpr2, gpr8); |
| masm.movq(gpr7, gpr3); |
| masm.movq(gpr9, gpr10); |
| masm.movq(gpr8, gpr2); |
| masm.movl(gpr2, 32768); |
| masm.jmp(bb6); |
| |
| masm.bind(bb13); |
| masm.shrl(gpr8); |
| masm.movq(gpr2, 0x100000000L); |
| masm.shrq(gpr2); |
| masm.movl(gpr3, 0); |
| masm.movl(gpr10, 0); |
| masm.subq(gpr3, gpr7); |
| masm.sbbq(gpr10, gpr9); |
| masm.sbbq(gpr2, gpr8); |
| masm.movq(gpr7, gpr3); |
| masm.movq(gpr9, gpr10); |
| masm.movq(gpr8, gpr2); |
| masm.movl(gpr2, 32768); |
| masm.shrq(gpr6, 3); |
| masm.addl(gpr6, 536870912); |
| masm.jmp(bb6); |
| |
| masm.bind(bb15); |
| } |
| |
| /* |
| * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) |
| * Source Code |
| * |
| * ALGORITHM DESCRIPTION - COS() --------------------- |
| * |
| * 1. RANGE REDUCTION |
| * |
| * We perform an initial range reduction from X to r with |
| * |
| * X =~= N * pi/32 + r |
| * |
| * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this, |
| * the range reduction is insufficiently accurate. For extremely small inputs, denormalization |
| * can occur internally, impacting performance. This means that the main path is actually only |
| * taken for 2^-252 <= |X| < 90112. |
| * |
| * To avoid branches, we perform the range reduction to full accuracy each time. |
| * |
| * X - N * (P_1 + P_2 + P_3) |
| * |
| * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit |
| * number. Together, these approximate pi well enough for all cases in the restricted range. |
| * |
| * The main reduction sequence is: |
| * |
| * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER) |
| * |
| * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the |
| * calculation) |
| * |
| * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3 |
| * |
| * 2. MAIN ALGORITHM |
| * |
| * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored |
| * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 * |
| * 53-bit sin(B) |
| * |
| * The computation is organized as follows: |
| * |
| * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] + |
| * cos(B) * [sin(r + c) - r] |
| * |
| * which is approximately: |
| * |
| * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) * |
| * [(sin(r) - r) + c] |
| * |
| * and this is what is actually computed. We separate this sum into four parts: |
| * |
| * hi + med + pols + corr |
| * |
| * where |
| * |
| * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) |
| * corr = S_lo + c * ((C_hl + sigma) - S_hi * r) |
| * |
| * 3. POLYNOMIAL |
| * |
| * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely, |
| * since it is quite small, so we exploit parallelism to the fullest. |
| * |
| * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4 |
| * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * <S_hi * r^2 |
| * | (C_hl + sigma) * r^3> |
| * |
| * 4. CORRECTION TERM |
| * |
| * This is where the "c" component of the range reduction is taken into account; recall that |
| * just "r" is used for most of the calculation. |
| * |
| * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo |
| * |
| * 5. COMPENSATED SUMMATIONS |
| * |
| * The two successive compensated summations add up the high and medium parts, leaving just the |
| * low parts to add up at the end. |
| * |
| * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi |
| * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med |
| * |
| * 6. FINAL SUMMATION |
| * |
| * We now add up all the small parts: |
| * |
| * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 |
| * |
| * Now the overall result is just: |
| * |
| * res_hi + res_lo |
| * |
| * 7. SMALL ARGUMENTS |
| * |
| * Inputs with |X| < 2^-252 are treated specially as 1 - |x|. |
| * |
| * Special cases: cos(NaN) = quiet NaN, and raise invalid exception cos(INF) = NaN and raise |
| * invalid exception cos(0) = 1 |
| * |
| */ |
| |
| public int[] one = { |
| 0x00000000, 0x3ff00000 |
| }; |
| |
| public void cosIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { |
| ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16); |
| ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16); |
| ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16); |
| ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16); |
| ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16); |
| ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16); |
| ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16); |
| ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16); |
| ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16); |
| ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8); |
| ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8); |
| ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8); |
| ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8); |
| ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8); |
| |
| Label bb0 = new Label(); |
| Label bb1 = new Label(); |
| Label bb3 = new Label(); |
| Label bb4 = new Label(); |
| Label bb5 = new Label(); |
| Label bb6 = new Label(); |
| Label bb7 = new Label(); |
| Label bb8 = new Label(); |
| Label bb9 = new Label(); |
| Label bb10 = new Label(); |
| Label bb11 = new Label(); |
| Label bb12 = new Label(); |
| Label bb13 = new Label(); |
| Label bb14 = new Label(); |
| |
| Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); |
| Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); |
| Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); |
| Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); |
| Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); |
| Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); |
| Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); |
| Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); |
| Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); |
| Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); |
| |
| Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); |
| Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); |
| Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); |
| Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); |
| Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); |
| Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); |
| Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); |
| Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); |
| Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); |
| |
| AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); |
| |
| setCrb(crb); |
| masm.movdq(stackSlot, value); |
| if (dest.encoding != value.encoding) { |
| masm.movdqu(dest, value); |
| } |
| |
| masm.leaq(gpr1, stackSlot); |
| masm.movl(gpr1, new AMD64Address(gpr1, 4)); |
| masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, |
| // 0x40245f30 |
| |
| masm.andl(gpr1, 2147418112); |
| masm.subl(gpr1, 808452096); |
| masm.cmpl(gpr1, 281346048); |
| masm.jcc(ConditionFlag.Above, bb0); |
| |
| masm.mulsd(temp1, dest); |
| masm.movdqu(temp5, externalAddress(oneHalfPtr)); // 0x00000000, |
| // 0x3fe00000, |
| // 0x00000000, |
| // 0x3fe00000 |
| masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, |
| // 0x80000000 |
| masm.pand(temp4, dest); |
| masm.por(temp5, temp4); |
| masm.addpd(temp1, temp5); |
| masm.cvttsd2sil(gpr4, temp1); |
| masm.cvtsi2sdl(temp1, gpr4); |
| masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, |
| // 0x3d90b461, |
| // 0x1a600000, |
| // 0x3d90b461 |
| masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, |
| // 0x3fb921fb |
| masm.mulsd(temp3, temp1); |
| masm.unpcklpd(temp1, temp1); |
| masm.addq(gpr4, 1865232); |
| masm.movdqu(temp4, dest); |
| masm.andq(gpr4, 63); |
| masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, |
| // 0x3ec71de3, |
| // 0x1a01a01a, |
| // 0x3efa01a0 |
| masm.leaq(gpr1, externalAddress(cTablePtr)); |
| masm.shlq(gpr4, 5); |
| masm.addq(gpr1, gpr4); |
| masm.movdqu(temp8, new AMD64Address(gpr1, 0)); |
| masm.mulpd(temp2, temp1); |
| masm.subsd(dest, temp3); |
| masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, |
| // 0x3b63198a |
| masm.subsd(temp4, temp3); |
| masm.unpcklpd(dest, dest); |
| masm.movdqu(temp3, temp4); |
| masm.subsd(temp4, temp2); |
| masm.mulpd(temp5, dest); |
| masm.subpd(dest, temp2); |
| masm.pshufd(temp7, temp8, 0xE); |
| masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, |
| // 0x3f811111, |
| // 0x55555555, |
| // 0x3fa55555 |
| masm.mulsd(temp7, temp4); |
| masm.subsd(temp3, temp4); |
| masm.mulpd(temp5, dest); |
| masm.mulpd(dest, dest); |
| masm.subsd(temp3, temp2); |
| masm.movdqu(temp2, temp8); |
| masm.subsd(temp1, temp3); |
| masm.movdq(temp3, new AMD64Address(gpr1, 24)); |
| masm.addsd(temp2, temp3); |
| masm.subsd(temp7, temp2); |
| masm.mulsd(temp2, temp4); |
| masm.mulpd(temp6, dest); |
| masm.mulsd(temp3, temp4); |
| masm.mulpd(temp2, dest); |
| masm.mulpd(dest, dest); |
| masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, |
| // 0xbf2a01a0, |
| // 0x16c16c17, |
| // 0xbf56c16c |
| masm.mulsd(temp4, temp8); |
| masm.pshufd(temp9, temp8, 0xE); |
| masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, |
| // 0xbfc55555, |
| // 0x00000000, |
| // 0xbfe00000 |
| masm.mulpd(temp5, dest); |
| masm.movdqu(dest, temp3); |
| masm.addsd(temp3, temp9); |
| masm.mulpd(temp1, temp7); |
| masm.movdqu(temp7, temp4); |
| masm.addsd(temp4, temp3); |
| masm.addpd(temp6, temp5); |
| masm.subsd(temp9, temp3); |
| masm.subsd(temp3, temp4); |
| masm.addsd(temp1, new AMD64Address(gpr1, 16)); |
| masm.mulpd(temp6, temp2); |
| masm.addsd(dest, temp9); |
| masm.addsd(temp3, temp7); |
| masm.addsd(dest, temp1); |
| masm.addsd(dest, temp3); |
| masm.addsd(dest, temp6); |
| masm.unpckhpd(temp6, temp6); |
| masm.addsd(dest, temp6); |
| masm.addsd(dest, temp4); |
| masm.jmp(bb13); |
| |
| masm.bind(bb14); |
| masm.xorpd(temp1, temp1); |
| masm.xorpd(dest, dest); |
| masm.divsd(dest, temp1); |
| masm.jmp(bb13); |
| |
| masm.bind(bb0); |
| masm.jcc(ConditionFlag.Greater, bb1); |
| |
| masm.pextrw(gpr1, dest, 3); |
| masm.andl(gpr1, 32767); |
| masm.pinsrw(dest, gpr1, 3); |
| masm.movdq(temp1, externalAddress(onePtr)); // 0x00000000, |
| // 0x3ff00000 |
| masm.subsd(temp1, dest); |
| masm.movdqu(dest, temp1); |
| masm.jmp(bb13); |
| |
| masm.bind(bb1); |
| masm.pextrw(gpr3, dest, 3); |
| masm.andl(gpr3, 32752); |
| masm.cmpl(gpr3, 32752); |
| masm.jcc(ConditionFlag.Equal, bb14); |
| |
| masm.subl(gpr3, 16224); |
| masm.shrl(gpr3, 7); |
| masm.andl(gpr3, 65532); |
| masm.leaq(gpr10, externalAddress(piInvTablePtr)); |
| masm.addq(gpr3, gpr10); |
| masm.movdq(gpr1, dest); |
| masm.movl(gpr9, new AMD64Address(gpr3, 20)); |
| masm.movl(gpr7, new AMD64Address(gpr3, 24)); |
| masm.movl(gpr4, gpr1); |
| masm.shrq(gpr1, 21); |
| masm.orl(gpr1, Integer.MIN_VALUE); |
| masm.shrl(gpr1, 11); |
| masm.movl(gpr8, gpr9); |
| masm.imulq(gpr9, gpr4); |
| masm.imulq(gpr8, gpr1); |
| masm.imulq(gpr7, gpr1); |
| masm.movl(gpr5, new AMD64Address(gpr3, 16)); |
| masm.movl(gpr6, new AMD64Address(gpr3, 12)); |
| masm.movl(gpr10, gpr9); |
| masm.shrq(gpr9, 32); |
| masm.addq(gpr8, gpr9); |
| masm.addq(gpr10, gpr7); |
| masm.movl(gpr7, gpr10); |
| masm.shrq(gpr10, 32); |
| masm.addq(gpr8, gpr10); |
| masm.movl(gpr9, gpr5); |
| masm.imulq(gpr5, gpr4); |
| masm.imulq(gpr9, gpr1); |
| masm.movl(gpr10, gpr6); |
| masm.imulq(gpr6, gpr4); |
| masm.movl(gpr2, gpr5); |
| masm.shrq(gpr5, 32); |
| masm.addq(gpr8, gpr2); |
| masm.movl(gpr2, gpr8); |
| masm.shrq(gpr8, 32); |
| masm.addq(gpr9, gpr5); |
| masm.addq(gpr9, gpr8); |
| masm.shlq(gpr2, 32); |
| masm.orq(gpr7, gpr2); |
| masm.imulq(gpr10, gpr1); |
| masm.movl(gpr8, new AMD64Address(gpr3, 8)); |
| masm.movl(gpr5, new AMD64Address(gpr3, 4)); |
| masm.movl(gpr2, gpr6); |
| masm.shrq(gpr6, 32); |
| masm.addq(gpr9, gpr2); |
| masm.movl(gpr2, gpr9); |
| masm.shrq(gpr9, 32); |
| masm.addq(gpr10, gpr6); |
| masm.addq(gpr10, gpr9); |
| masm.movq(gpr6, gpr8); |
| masm.imulq(gpr8, gpr4); |
| masm.imulq(gpr6, gpr1); |
| masm.movl(gpr9, gpr8); |
| masm.shrq(gpr8, 32); |
| masm.addq(gpr10, gpr9); |
| masm.movl(gpr9, gpr10); |
| masm.shrq(gpr10, 32); |
| masm.addq(gpr6, gpr8); |
| masm.addq(gpr6, gpr10); |
| masm.movq(gpr8, gpr5); |
| masm.imulq(gpr5, gpr4); |
| masm.imulq(gpr8, gpr1); |
| masm.shlq(gpr9, 32); |
| masm.orq(gpr9, gpr2); |
| masm.movl(gpr1, new AMD64Address(gpr3, 0)); |
| masm.movl(gpr10, gpr5); |
| masm.shrq(gpr5, 32); |
| masm.addq(gpr6, gpr10); |
| masm.movl(gpr10, gpr6); |
| masm.shrq(gpr6, 32); |
| masm.addq(gpr8, gpr5); |
| masm.addq(gpr8, gpr6); |
| masm.imulq(gpr4, gpr1); |
| masm.pextrw(gpr2, dest, 3); |
| masm.leaq(gpr6, externalAddress(piInvTablePtr)); |
| masm.subq(gpr3, gpr6); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, 19); |
| masm.movl(gpr5, 32768); |
| masm.andl(gpr5, gpr2); |
| masm.shrl(gpr2, 4); |
| masm.andl(gpr2, 2047); |
| masm.subl(gpr2, 1023); |
| masm.subl(gpr3, gpr2); |
| masm.addq(gpr8, gpr4); |
| masm.movl(gpr4, gpr3); |
| masm.addl(gpr4, 32); |
| masm.cmpl(gpr3, 1); |
| masm.jcc(ConditionFlag.Less, bb3); |
| |
| masm.negl(gpr3); |
| masm.addl(gpr3, 29); |
| masm.shll(gpr8); |
| masm.movl(gpr6, gpr8); |
| masm.andl(gpr8, 536870911); |
| masm.testl(gpr8, 268435456); |
| masm.jcc(ConditionFlag.NotEqual, bb4); |
| |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 0); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| |
| masm.bind(bb5); |
| |
| masm.bind(bb6); |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.Equal, bb7); |
| |
| masm.bind(bb8); |
| masm.bsrq(gpr10, gpr8); |
| masm.movl(gpr3, 29); |
| masm.subl(gpr3, gpr10); |
| masm.jcc(ConditionFlag.LessEqual, bb9); |
| |
| masm.shlq(gpr8); |
| masm.movq(gpr1, gpr9); |
| masm.shlq(gpr9); |
| masm.addl(gpr4, gpr3); |
| masm.negl(gpr3); |
| masm.addl(gpr3, 64); |
| masm.shrq(gpr1); |
| masm.shrq(gpr7); |
| masm.orq(gpr8, gpr1); |
| masm.orq(gpr9, gpr7); |
| |
| masm.bind(bb10); |
| masm.cvtsi2sdq(dest, gpr8); |
| masm.shrq(gpr9, 1); |
| masm.cvtsi2sdq(temp3, gpr9); |
| masm.xorpd(temp4, temp4); |
| masm.shll(gpr4, 4); |
| masm.negl(gpr4); |
| masm.addl(gpr4, 16368); |
| masm.orl(gpr4, gpr5); |
| masm.xorl(gpr4, gpr2); |
| masm.pinsrw(temp4, gpr4, 3); |
| masm.leaq(gpr2, externalAddress(piFourPtr)); |
| masm.movdqu(temp2, new AMD64Address(gpr2, 0)); // 0x40000000, |
| // 0x3fe921fb, |
| // 0x18469899, |
| // 0x3e64442d |
| masm.xorpd(temp5, temp5); |
| masm.subl(gpr4, 1008); |
| masm.pinsrw(temp5, gpr4, 3); |
| masm.mulsd(dest, temp4); |
| masm.shll(gpr5, 16); |
| masm.sarl(gpr5, 31); |
| masm.mulsd(temp3, temp5); |
| masm.movdqu(temp1, dest); |
| masm.mulsd(dest, temp2); |
| masm.pshufd(temp6, temp2, 0xE); |
| masm.shrl(gpr6, 29); |
| masm.addsd(temp1, temp3); |
| masm.mulsd(temp3, temp2); |
| masm.addl(gpr6, gpr5); |
| masm.xorl(gpr6, gpr5); |
| masm.mulsd(temp6, temp1); |
| masm.movl(gpr1, gpr6); |
| masm.addsd(temp6, temp3); |
| masm.movdqu(temp2, dest); |
| masm.addsd(dest, temp6); |
| masm.subsd(temp2, dest); |
| masm.addsd(temp6, temp2); |
| |
| masm.bind(bb11); |
| masm.movq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, |
| // 0x40245f30 |
| masm.mulsd(temp1, dest); |
| masm.movdq(temp5, externalAddress(oneHalfPtr)); // 0x00000000, |
| // 0x3fe00000, |
| // 0x00000000, |
| // 0x3fe00000 |
| masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, |
| // 0x80000000 |
| masm.pand(temp4, dest); |
| masm.por(temp5, temp4); |
| masm.addpd(temp1, temp5); |
| masm.cvttsd2siq(gpr4, temp1); |
| masm.cvtsi2sdq(temp1, gpr4); |
| masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, |
| // 0x3fb921fb |
| masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, |
| // 0x3d90b461, |
| // 0x1a600000, |
| // 0x3d90b461 |
| masm.mulsd(temp3, temp1); |
| masm.unpcklpd(temp1, temp1); |
| masm.shll(gpr1, 3); |
| masm.addl(gpr4, 1865232); |
| masm.movdqu(temp4, dest); |
| masm.addl(gpr4, gpr1); |
| masm.andl(gpr4, 63); |
| masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, |
| // 0x3ec71de3, |
| // 0x1a01a01a, |
| // 0x3efa01a0 |
| masm.leaq(gpr1, externalAddress(cTablePtr)); |
| masm.shll(gpr4, 5); |
| masm.addq(gpr1, gpr4); |
| masm.movdqu(temp8, new AMD64Address(gpr1, 0)); |
| masm.mulpd(temp2, temp1); |
| masm.subsd(dest, temp3); |
| masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, |
| // 0x3b63198a |
| masm.subsd(temp4, temp3); |
| masm.unpcklpd(dest, dest); |
| masm.movdqu(temp3, temp4); |
| masm.subsd(temp4, temp2); |
| masm.mulpd(temp5, dest); |
| masm.pshufd(temp7, temp8, 0xE); |
| masm.movdqu(temp9, temp7); |
| masm.subpd(dest, temp2); |
| masm.mulsd(temp7, temp4); |
| masm.subsd(temp3, temp4); |
| masm.mulpd(temp5, dest); |
| masm.mulpd(dest, dest); |
| masm.subsd(temp3, temp2); |
| masm.movdqu(temp2, temp8); |
| masm.subsd(temp1, temp3); |
| masm.movdq(temp3, new AMD64Address(gpr1, 24)); |
| masm.addsd(temp2, temp3); |
| masm.subsd(temp7, temp2); |
| masm.subsd(temp1, temp6); |
| masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, |
| // 0x3f811111, |
| // 0x55555555, |
| // 0x3fa55555 |
| masm.mulsd(temp2, temp4); |
| masm.mulpd(temp6, dest); |
| masm.mulsd(temp3, temp4); |
| masm.mulpd(temp2, dest); |
| masm.mulpd(dest, dest); |
| masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, |
| // 0xbf2a01a0, |
| // 0x16c16c17, |
| // 0xbf56c16c |
| masm.mulsd(temp4, temp8); |
| masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, |
| // 0xbfc55555, |
| // 0x00000000, |
| // 0xbfe00000 |
| masm.mulpd(temp5, dest); |
| masm.movdqu(dest, temp3); |
| masm.addsd(temp3, temp9); |
| masm.mulpd(temp1, temp7); |
| masm.movdqu(temp7, temp4); |
| masm.addsd(temp4, temp3); |
| masm.addpd(temp6, temp5); |
| masm.subsd(temp9, temp3); |
| masm.subsd(temp3, temp4); |
| masm.addsd(temp1, new AMD64Address(gpr1, 16)); |
| masm.mulpd(temp6, temp2); |
| masm.addsd(temp9, dest); |
| masm.addsd(temp3, temp7); |
| masm.addsd(temp1, temp9); |
| masm.addsd(temp1, temp3); |
| masm.addsd(temp1, temp6); |
| masm.unpckhpd(temp6, temp6); |
| masm.movdqu(dest, temp4); |
| masm.addsd(temp1, temp6); |
| masm.addsd(dest, temp1); |
| masm.jmp(bb13); |
| |
| masm.bind(bb7); |
| masm.addl(gpr4, 64); |
| masm.movq(gpr8, gpr9); |
| masm.movq(gpr9, gpr7); |
| masm.movl(gpr7, 0); |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.NotEqual, bb8); |
| |
| masm.addl(gpr4, 64); |
| masm.movq(gpr8, gpr9); |
| masm.movq(gpr9, gpr7); |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.NotEqual, bb8); |
| |
| masm.xorpd(dest, dest); |
| masm.xorpd(temp6, temp6); |
| masm.jmp(bb11); |
| |
| masm.bind(bb9); |
| masm.jcc(ConditionFlag.Equal, bb10); |
| |
| masm.negl(gpr3); |
| masm.shrq(gpr9); |
| masm.movq(gpr1, gpr8); |
| masm.shrq(gpr8); |
| masm.subl(gpr4, gpr3); |
| masm.negl(gpr3); |
| masm.addl(gpr3, 64); |
| masm.shlq(gpr1); |
| masm.orq(gpr9, gpr1); |
| masm.jmp(bb10); |
| |
| masm.bind(bb3); |
| masm.negl(gpr3); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| masm.shlq(gpr8); |
| masm.movq(gpr6, gpr8); |
| masm.testl(gpr8, Integer.MIN_VALUE); |
| masm.jcc(ConditionFlag.NotEqual, bb12); |
| |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 0); |
| masm.shrq(gpr6, 3); |
| masm.jmp(bb6); |
| |
| masm.bind(bb4); |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 536870912); |
| masm.shrl(gpr2); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| masm.shlq(gpr2, 32); |
| masm.addl(gpr6, 536870912); |
| masm.movl(gpr3, 0); |
| masm.movl(gpr10, 0); |
| masm.subq(gpr3, gpr7); |
| masm.sbbq(gpr10, gpr9); |
| masm.sbbq(gpr2, gpr8); |
| masm.movq(gpr7, gpr3); |
| masm.movq(gpr9, gpr10); |
| masm.movq(gpr8, gpr2); |
| masm.movl(gpr2, 32768); |
| masm.jmp(bb5); |
| |
| masm.bind(bb12); |
| masm.shrl(gpr8); |
| masm.movq(gpr2, 0x100000000L); |
| masm.shrq(gpr2); |
| masm.movl(gpr3, 0); |
| masm.movl(gpr10, 0); |
| masm.subq(gpr3, gpr7); |
| masm.sbbq(gpr10, gpr9); |
| masm.sbbq(gpr2, gpr8); |
| masm.movq(gpr7, gpr3); |
| masm.movq(gpr9, gpr10); |
| masm.movq(gpr8, gpr2); |
| masm.movl(gpr2, 32768); |
| masm.shrq(gpr6, 3); |
| masm.addl(gpr6, 536870912); |
| masm.jmp(bb6); |
| |
| masm.bind(bb13); |
| } |
| |
| /* |
| * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) |
| * Source Code |
| * |
| * ALGORITHM DESCRIPTION - TAN() --------------------- |
| * |
| * Polynomials coefficients and other constants. |
| * |
| * Note that in this algorithm, there is a different polynomial for each breakpoint, so there |
| * are 32 sets of polynomial coefficients as well as 32 instances of the other constants. |
| * |
| * The polynomial coefficients and constants are offset from the start of the main block as |
| * follows: |
| * |
| * 0: c8 | c0 16: c9 | c1 32: c10 | c2 48: c11 | c3 64: c12 | c4 80: c13 | c5 96: c14 | c6 112: |
| * c15 | c7 128: T_hi 136: T_lo 144: Sigma 152: T_hl 160: Tau 168: Mask 176: (end of block) |
| * |
| * The total table size is therefore 5632 bytes. |
| * |
| * Note that c0 and c1 are always zero. We could try storing other constants here, and just |
| * loading the low part of the SIMD register in these cases, after ensuring the high part is |
| * zero. |
| * |
| * The higher terms of the polynomial are computed in the *low* part of the SIMD register. This |
| * is so we can overlap the multiplication by r^8 and the unpacking of the other part. |
| * |
| * The constants are: T_hi + T_lo = accurate constant term in power series Sigma + T_hl = |
| * accurate coefficient of r in power series (Sigma=1 bit) Tau = multiplier for the reciprocal, |
| * always -1 or 0 |
| * |
| * The basic reconstruction formula using these constants is: |
| * |
| * High = tau * recip_hi + t_hi Med = (sgn * r + t_hl * r)_hi Low = (sgn * r + t_hl * r)_lo + |
| * tau * recip_lo + T_lo + (T_hl + sigma) * c + pol |
| * |
| * where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 |
| * |
| * (c0 = c1 = 0, but using them keeps SIMD regularity) |
| * |
| * We then do a compensated sum High + Med, add the low parts together and then do the final |
| * sum. |
| * |
| * Here recip_hi + recip_lo is an accurate reciprocal of the remainder modulo pi/2 |
| * |
| * Special cases: tan(NaN) = quiet NaN, and raise invalid exception tan(INF) = NaN and raise |
| * invalid exception tan(+/-0) = +/-0 |
| * |
| */ |
| |
| private static int[] oneHalfTan = { |
| 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 |
| }; |
| |
| private static int[] mulSixteen = { |
| 0x00000000, 0x40300000, 0x00000000, 0x3ff00000 |
| }; |
| |
| private static int[] signMaskTan = { |
| 0x00000000, 0x80000000, 0x00000000, 0x80000000 |
| }; |
| |
| private static int[] piThirtyTwoInvTan = { |
| 0x6dc9c883, 0x3fe45f30, 0x6dc9c883, 0x40245f30 |
| }; |
| |
| private static int[] pOneTan = { |
| 0x54444000, 0x3fb921fb, 0x54440000, 0x3fb921fb |
| }; |
| |
| private static int[] pTwoTan = { |
| 0x67674000, 0xbd32e7b9, 0x4c4c0000, 0x3d468c23 |
| }; |
| |
| private static int[] pThreeTan = { |
| 0x3707344a, 0x3aa8a2e0, 0x03707345, 0x3ae98a2e |
| }; |
| |
| private static int[] cTableTan = { |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x882c10fa, |
| 0x3f9664f4, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x55e6c23d, 0x3f8226e3, 0x55555555, |
| 0x3fd55555, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x0e157de0, 0x3f6d6d3d, 0x11111111, 0x3fc11111, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x452b75e3, 0x3f57da36, |
| 0x1ba1ba1c, 0x3faba1ba, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, |
| 0x3f953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, |
| 0x00000000, 0x00000000, 0xda5b7511, 0x3f85ad63, 0xdc230b9b, |
| 0x3fb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, |
| 0x77bb08ba, 0x3f757c85, 0xb6247521, 0x3fb1381e, 0x5922170c, |
| 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0x3f64e391, |
| 0x3e666320, 0x3fa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, |
| 0x3fafa8ae, 0x8c5b2da2, 0x3fb936bb, 0x4e88f7a5, 0x3c587d05, |
| 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x5a279ea3, 0x3faa3407, |
| 0x00000000, 0x00000000, 0x432d65fa, 0x3fa70153, 0x00000000, |
| 0x00000000, 0x891a4602, 0x3f9d03ef, 0xd62ca5f8, 0x3fca77d9, |
| 0xb35f4628, 0x3f97a265, 0x433258fa, 0x3fd8cf51, 0xb58fd909, |
| 0x3f8f88e3, 0x01771cea, 0x3fc2b154, 0xf3562f8e, 0x3f888f57, |
| 0xc028a723, 0x3fc7370f, 0x20b7f9f0, 0x3f80f44c, 0x214368e9, |
| 0x3fb6dfaa, 0x28891863, 0x3f79b4b6, 0x172dbbf0, 0x3fb6cb8e, |
| 0xe0553158, 0x3fc975f5, 0x593fe814, 0x3c2ef5d3, 0x00000000, |
| 0x3ff00000, 0x03dec550, 0x3fa44203, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x9314533e, 0x3fbb8ec5, 0x00000000, |
| 0x00000000, 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, |
| 0xdcb427fd, 0x3fb13950, 0xd87ab0bb, 0x3fd5335e, 0xce0ae8a5, |
| 0x3fabb382, 0x79143126, 0x3fddba41, 0x5f2b28d4, 0x3fa552f1, |
| 0x59f21a6d, 0x3fd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, |
| 0x3fd0576c, 0x8f2c2950, 0x3f9a4898, 0xc0b3f22c, 0x3fc59462, |
| 0x1883a4b8, 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, |
| 0x3fd36a08, 0x1dce993d, 0xbc6d704d, 0x00000000, 0x3ff00000, |
| 0x2b82ab63, 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x56f37042, 0x3fccfc56, 0x00000000, 0x00000000, |
| 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 0x3d0e7c5d, |
| 0x3fc50533, 0x9bed9b2e, 0x3fdf0ed9, 0x5fe7c47c, 0x3fc1f250, |
| 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0x3fbe5c71, 0x86362c20, |
| 0x3fda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 0x3fd911bd, |
| 0xb56658be, 0x3fb5e4c7, 0x93a2fd76, 0x3fd3c092, 0xda271794, |
| 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 0x3fda8279, |
| 0xb68c1467, 0x3c708b2f, 0x00000000, 0x3ff00000, 0x980c4337, |
| 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0xcc03e501, 0x3fdff10f, 0x00000000, 0x00000000, 0x44a4e845, |
| 0x3fddb63b, 0x00000000, 0x00000000, 0x3768ad9f, 0x3fdb72a4, |
| 0x3dd01cca, 0x3fe5fdb9, 0xa61d2811, 0x3fd972b2, 0x5645ad0b, |
| 0x3fe977f9, 0xd013b3ab, 0x3fd78ca3, 0xbf0bf914, 0x3fe4f192, |
| 0x4d53e730, 0x3fd5d060, 0x3f8b9000, 0x3fe49933, 0xe2b82f08, |
| 0x3fd4322a, 0x5936a835, 0x3fe27ae1, 0xb1c61c9b, 0x3fd2b3fb, |
| 0xef478605, 0x3fe1659e, 0x190834ec, 0x3fe11ab7, 0xcdb625ea, |
| 0xbc8e564b, 0x00000000, 0x3ff00000, 0xb07217e3, 0x3fd248f1, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, |
| 0x3ff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, |
| 0x00000000, 0x00000000, 0xff691fa2, 0x3ff3972e, 0xe93463bd, |
| 0x3feeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, |
| 0xa04e8ea3, 0x3ff4541a, 0x386accd3, 0x3ff1369e, 0x222a66dd, |
| 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0x3ff5178f, |
| 0xddaa0031, 0x3ff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, |
| 0x3ff29311, 0x2ab7f990, 0x3fe561b8, 0x209c7df1, 0x3c87a8c5, |
| 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0xc7ab4d5a, 0x40085e24, |
| 0x00000000, 0x00000000, 0xe93ea75d, 0x400b963d, 0x00000000, |
| 0x00000000, 0x94a7f25a, 0x400f37e2, 0x4b6261cb, 0x3ff5f984, |
| 0x5a9dd812, 0x4011aab0, 0x74c30018, 0x3ffaf5a5, 0x7f2ce8e3, |
| 0x4013fe8b, 0xfe8e54fa, 0x3ffd7334, 0x670d618d, 0x4016a10c, |
| 0x4db97058, 0x4000e012, 0x24df44dd, 0x40199c5f, 0x697d6ece, |
| 0x4003006e, 0x83298b82, 0x401cfc4d, 0x19d490d6, 0x40058c19, |
| 0x2ae42850, 0x3fea4300, 0x118e20e6, 0xbc7a6db8, 0x00000000, |
| 0x40000000, 0xe33345b8, 0xbfd4e526, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x65965966, 0x40219659, 0x00000000, |
| 0x00000000, 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, |
| 0x83cd3723, 0x402c8342, 0x00000000, 0x40000000, 0x55e6c23d, |
| 0x403226e3, 0x55555555, 0x40055555, 0x34451939, 0x40371c96, |
| 0xaaaaaaab, 0x400aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, |
| 0x40111111, 0xa738201f, 0x4042bbce, 0x05b05b06, 0x4015b05b, |
| 0x452b75e3, 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, |
| 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x4f48b8d3, 0xbf33eaf9, 0x00000000, 0x00000000, |
| 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 0xd0258911, |
| 0xbf0abaf3, 0x23e49fe9, 0xbfab5a8c, 0x2d53222e, 0x3ef60d15, |
| 0x21169451, 0x3fa172b2, 0xbb254dbc, 0xbee1d3b5, 0xdbf93b8e, |
| 0xbf84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 0x3f743924, |
| 0x794a8297, 0xbeb7b7b9, 0xe015f797, 0xbf5d41f5, 0xe41a4a56, |
| 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 0xbfce49ce, |
| 0x8c743719, 0x3d1eb860, 0x00000000, 0x00000000, 0x1b4863cf, |
| 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, |
| 0x535ad890, 0xbf2b9320, 0x00000000, 0x00000000, 0x018fdf1f, |
| 0x3f16d61d, 0x00000000, 0x00000000, 0x0359f1be, 0xbf0139e4, |
| 0xa4317c6d, 0xbfa67e17, 0x82672d0f, 0x3eebb405, 0x2f1b621e, |
| 0x3f9f455b, 0x51ccf238, 0xbed55317, 0xf437b9ac, 0xbf804bee, |
| 0xc791a2b5, 0x3ec0e993, 0x919a1db2, 0x3f7080c2, 0x336a5b0e, |
| 0xbeaa48a2, 0x0a268358, 0xbf55a443, 0xdfd978e4, 0x3e94b61f, |
| 0xd7767a58, 0x3f431806, 0x2aea0000, 0xbfc9bbe8, 0x7723ea61, |
| 0xbd3a2369, 0x00000000, 0x00000000, 0xdf7796ff, 0x3fd6e642, |
| 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, |
| 0xbf231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, |
| 0x00000000, 0x00000000, 0x790b4cbc, 0xbef66191, 0x848a46c6, |
| 0xbfa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, |
| 0xfdd299ef, 0xbec9dd1a, 0x3f8dbaaf, 0xbf793363, 0x309fc6ea, |
| 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0xbe9dae11, |
| 0x3e5c67b3, 0xbf4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, |
| 0x3f3d1eb1, 0x29cfc000, 0xbfc549ce, 0xbf159358, 0xbd397b33, |
| 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, |
| 0x3ff00000, 0x00000000, 0xfffffff8, 0x7d98a556, 0xbf1a3958, |
| 0x00000000, 0x00000000, 0x9d88dc01, 0x3f0704c2, 0x00000000, |
| 0x00000000, 0x73742a2b, 0xbeed054a, 0x58844587, 0xbf9c2a13, |
| 0x55688a79, 0x3ed7a326, 0xee33f1d6, 0x3f9a48f4, 0xa8dc9888, |
| 0xbebf8939, 0xaad4b5b8, 0xbf72f746, 0x9102efa1, 0x3ea88f82, |
| 0xdabc29cf, 0x3f678228, 0x9289afb8, 0xbe90f456, 0x741fb4ed, |
| 0xbf46f3a3, 0xa97f6663, 0x3e79b4bf, 0xca89ff3f, 0x3f36db70, |
| 0xa8a2a000, 0xbfc0ee13, 0x3da24be1, 0xbd338b9f, 0x00000000, |
| 0x00000000, 0x11cd6c69, 0x3fd601fd, 0x00000000, 0x3ff00000, |
| 0x00000000, 0xfffffff8, 0x1a154b97, 0xbf116b01, 0x00000000, |
| 0x00000000, 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, |
| 0xb93820c8, 0xbee264d4, 0xbb6cbb18, 0xbf94ab8c, 0x888d4d92, |
| 0x3ed0568b, 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0xbeb2f950, |
| 0x22cf9f74, 0xbf6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, |
| 0x3f64aad7, 0x637b73af, 0xbe83487c, 0xe522591a, 0xbf3fc092, |
| 0xa158e8bc, 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, |
| 0xbfb9477f, 0xc2c2d2bc, 0xbd135ef9, 0x00000000, 0x00000000, |
| 0xf2fdb123, 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, |
| 0xfffffff8, 0xc41acb64, 0xbf05448d, 0x00000000, 0x00000000, |
| 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 0x9e42962d, |
| 0xbed5aea5, 0x2579f8ef, 0xbf8b2398, 0x288a1ed9, 0x3ec81441, |
| 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0xbea57cd3, 0x5766336f, |
| 0xbf617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 0x3f62c646, |
| 0x6b8fb29c, 0xbe74e3a3, 0xdc4c0409, 0xbf33f952, 0x9bffe365, |
| 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 0xbfb0cc62, |
| 0x016b907f, 0xbd119cbc, 0x00000000, 0x00000000, 0xe6b9d8fa, |
| 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, |
| 0x5daf22a6, 0xbef429d7, 0x00000000, 0x00000000, 0x06bca545, |
| 0x3ef7a27d, 0x00000000, 0x00000000, 0x7211c19a, 0xbec41c3e, |
| 0x956ed53e, 0xbf7ae3f4, 0xee750e72, 0x3ec3901b, 0x91d443f5, |
| 0x3f96f713, 0x36661e6c, 0xbe936e09, 0x506f9381, 0xbf5122e8, |
| 0xcb6dd43f, 0x3e9041b9, 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, |
| 0xbe625a8a, 0xe5a0e9dc, 0xbf23499d, 0x110384dd, 0x3e5b1c2c, |
| 0x68d43db6, 0x3f2cb899, 0x6ecac000, 0xbfa0c414, 0xcd7dd58c, |
| 0x3d13500f, 0x00000000, 0x00000000, 0x85a2c8fb, 0x3fd55fe0, |
| 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x2bf70ebe, 0x3ef66a8f, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0xd644267f, 0x3ec22805, 0x16c16c17, 0x3f96c16c, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc4e09162, |
| 0x3e8d6db2, 0xbc011567, 0x3f61566a, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x1f79955c, 0x3e57da4e, 0x9334ef0b, |
| 0x3f2bbd77, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x55555555, 0x3fd55555, 0x00000000, |
| 0x3ff00000, 0x00000000, 0xfffffff8, 0x5daf22a6, 0x3ef429d7, |
| 0x00000000, 0x00000000, 0x06bca545, 0x3ef7a27d, 0x00000000, |
| 0x00000000, 0x7211c19a, 0x3ec41c3e, 0x956ed53e, 0x3f7ae3f4, |
| 0xee750e72, 0x3ec3901b, 0x91d443f5, 0x3f96f713, 0x36661e6c, |
| 0x3e936e09, 0x506f9381, 0x3f5122e8, 0xcb6dd43f, 0x3e9041b9, |
| 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 0x3e625a8a, 0xe5a0e9dc, |
| 0x3f23499d, 0x110384dd, 0x3e5b1c2c, 0x68d43db6, 0x3f2cb899, |
| 0x6ecac000, 0x3fa0c414, 0xcd7dd58c, 0xbd13500f, 0x00000000, |
| 0x00000000, 0x85a2c8fb, 0x3fd55fe0, 0x00000000, 0x3ff00000, |
| 0x00000000, 0xfffffff8, 0xc41acb64, 0x3f05448d, 0x00000000, |
| 0x00000000, 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, |
| 0x9e42962d, 0x3ed5aea5, 0x2579f8ef, 0x3f8b2398, 0x288a1ed9, |
| 0x3ec81441, 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0x3ea57cd3, |
| 0x5766336f, 0x3f617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, |
| 0x3f62c646, 0x6b8fb29c, 0x3e74e3a3, 0xdc4c0409, 0x3f33f952, |
| 0x9bffe365, 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, |
| 0x3fb0cc62, 0x016b907f, 0x3d119cbc, 0x00000000, 0x00000000, |
| 0xe6b9d8fa, 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, |
| 0xfffffff8, 0x1a154b97, 0x3f116b01, 0x00000000, 0x00000000, |
| 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 0xb93820c8, |
| 0x3ee264d4, 0xbb6cbb18, 0x3f94ab8c, 0x888d4d92, 0x3ed0568b, |
| 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0x3eb2f950, 0x22cf9f74, |
| 0x3f6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 0x3f64aad7, |
| 0x637b73af, 0x3e83487c, 0xe522591a, 0x3f3fc092, 0xa158e8bc, |
| 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 0x3fb9477f, |
| 0xc2c2d2bc, 0x3d135ef9, 0x00000000, 0x00000000, 0xf2fdb123, |
| 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, |
| 0x7d98a556, 0x3f1a3958, 0x00000000, 0x00000000, 0x9d88dc01, |
| 0x3f0704c2, 0x00000000, 0x00000000, 0x73742a2b, 0x3eed054a, |
| 0x58844587, 0x3f9c2a13, 0x55688a79, 0x3ed7a326, 0xee33f1d6, |
| 0x3f9a48f4, 0xa8dc9888, 0x3ebf8939, 0xaad4b5b8, 0x3f72f746, |
| 0x9102efa1, 0x3ea88f82, 0xdabc29cf, 0x3f678228, 0x9289afb8, |
| 0x3e90f456, 0x741fb4ed, 0x3f46f3a3, 0xa97f6663, 0x3e79b4bf, |
| 0xca89ff3f, 0x3f36db70, 0xa8a2a000, 0x3fc0ee13, 0x3da24be1, |
| 0x3d338b9f, 0x00000000, 0x00000000, 0x11cd6c69, 0x3fd601fd, |
| 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, |
| 0x3f231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, |
| 0x00000000, 0x00000000, 0x790b4cbc, 0x3ef66191, 0x848a46c6, |
| 0x3fa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, |
| 0xfdd299ef, 0x3ec9dd1a, 0x3f8dbaaf, 0x3f793363, 0x309fc6ea, |
| 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0x3e9dae11, |
| 0x3e5c67b3, 0x3f4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, |
| 0x3f3d1eb1, 0x29cfc000, 0x3fc549ce, 0xbf159358, 0x3d397b33, |
| 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, |
| 0x3ff00000, 0x00000000, 0xfffffff8, 0x535ad890, 0x3f2b9320, |
| 0x00000000, 0x00000000, 0x018fdf1f, 0x3f16d61d, 0x00000000, |
| 0x00000000, 0x0359f1be, 0x3f0139e4, 0xa4317c6d, 0x3fa67e17, |
| 0x82672d0f, 0x3eebb405, 0x2f1b621e, 0x3f9f455b, 0x51ccf238, |
| 0x3ed55317, 0xf437b9ac, 0x3f804bee, 0xc791a2b5, 0x3ec0e993, |
| 0x919a1db2, 0x3f7080c2, 0x336a5b0e, 0x3eaa48a2, 0x0a268358, |
| 0x3f55a443, 0xdfd978e4, 0x3e94b61f, 0xd7767a58, 0x3f431806, |
| 0x2aea0000, 0x3fc9bbe8, 0x7723ea61, 0x3d3a2369, 0x00000000, |
| 0x00000000, 0xdf7796ff, 0x3fd6e642, 0x00000000, 0x3ff00000, |
| 0x00000000, 0xfffffff8, 0x4f48b8d3, 0x3f33eaf9, 0x00000000, |
| 0x00000000, 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, |
| 0xd0258911, 0x3f0abaf3, 0x23e49fe9, 0x3fab5a8c, 0x2d53222e, |
| 0x3ef60d15, 0x21169451, 0x3fa172b2, 0xbb254dbc, 0x3ee1d3b5, |
| 0xdbf93b8e, 0x3f84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, |
| 0x3f743924, 0x794a8297, 0x3eb7b7b9, 0xe015f797, 0x3f5d41f5, |
| 0xe41a4a56, 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, |
| 0x3fce49ce, 0x8c743719, 0xbd1eb860, 0x00000000, 0x00000000, |
| 0x1b4863cf, 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, |
| 0xfffffff8, 0x65965966, 0xc0219659, 0x00000000, 0x00000000, |
| 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 0x83cd3723, |
| 0xc02c8342, 0x00000000, 0xc0000000, 0x55e6c23d, 0x403226e3, |
| 0x55555555, 0x40055555, 0x34451939, 0xc0371c96, 0xaaaaaaab, |
| 0xc00aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 0x40111111, |
| 0xa738201f, 0xc042bbce, 0x05b05b06, 0xc015b05b, 0x452b75e3, |
| 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 0xbff00000, |
| 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0xc7ab4d5a, 0xc0085e24, 0x00000000, 0x00000000, 0xe93ea75d, |
| 0x400b963d, 0x00000000, 0x00000000, 0x94a7f25a, 0xc00f37e2, |
| 0x4b6261cb, 0xbff5f984, 0x5a9dd812, 0x4011aab0, 0x74c30018, |
| 0x3ffaf5a5, 0x7f2ce8e3, 0xc013fe8b, 0xfe8e54fa, 0xbffd7334, |
| 0x670d618d, 0x4016a10c, 0x4db97058, 0x4000e012, 0x24df44dd, |
| 0xc0199c5f, 0x697d6ece, 0xc003006e, 0x83298b82, 0x401cfc4d, |
| 0x19d490d6, 0x40058c19, 0x2ae42850, 0xbfea4300, 0x118e20e6, |
| 0x3c7a6db8, 0x00000000, 0x40000000, 0xe33345b8, 0xbfd4e526, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, |
| 0xbff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, |
| 0x00000000, 0x00000000, 0xff691fa2, 0xbff3972e, 0xe93463bd, |
| 0xbfeeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, |
| 0xa04e8ea3, 0xbff4541a, 0x386accd3, 0xbff1369e, 0x222a66dd, |
| 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0xbff5178f, |
| 0xddaa0031, 0xbff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, |
| 0x3ff29311, 0x2ab7f990, 0xbfe561b8, 0x209c7df1, 0xbc87a8c5, |
| 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0xcc03e501, 0xbfdff10f, |
| 0x00000000, 0x00000000, 0x44a4e845, 0x3fddb63b, 0x00000000, |
| 0x00000000, 0x3768ad9f, 0xbfdb72a4, 0x3dd01cca, 0xbfe5fdb9, |
| 0xa61d2811, 0x3fd972b2, 0x5645ad0b, 0x3fe977f9, 0xd013b3ab, |
| 0xbfd78ca3, 0xbf0bf914, 0xbfe4f192, 0x4d53e730, 0x3fd5d060, |
| 0x3f8b9000, 0x3fe49933, 0xe2b82f08, 0xbfd4322a, 0x5936a835, |
| 0xbfe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 0xef478605, 0x3fe1659e, |
| 0x190834ec, 0xbfe11ab7, 0xcdb625ea, 0x3c8e564b, 0x00000000, |
| 0x3ff00000, 0xb07217e3, 0x3fd248f1, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x56f37042, 0xbfccfc56, 0x00000000, |
| 0x00000000, 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, |
| 0x3d0e7c5d, 0xbfc50533, 0x9bed9b2e, 0xbfdf0ed9, 0x5fe7c47c, |
| 0x3fc1f250, 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0xbfbe5c71, |
| 0x86362c20, 0xbfda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, |
| 0x3fd911bd, 0xb56658be, 0xbfb5e4c7, 0x93a2fd76, 0xbfd3c092, |
| 0xda271794, 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, |
| 0xbfda8279, 0xb68c1467, 0xbc708b2f, 0x00000000, 0x3ff00000, |
| 0x980c4337, 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x9314533e, 0xbfbb8ec5, 0x00000000, 0x00000000, |
| 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 0xdcb427fd, |
| 0xbfb13950, 0xd87ab0bb, 0xbfd5335e, 0xce0ae8a5, 0x3fabb382, |
| 0x79143126, 0x3fddba41, 0x5f2b28d4, 0xbfa552f1, 0x59f21a6d, |
| 0xbfd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 0x3fd0576c, |
| 0x8f2c2950, 0xbf9a4898, 0xc0b3f22c, 0xbfc59462, 0x1883a4b8, |
| 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 0xbfd36a08, |
| 0x1dce993d, 0x3c6d704d, 0x00000000, 0x3ff00000, 0x2b82ab63, |
| 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x5a279ea3, 0xbfaa3407, 0x00000000, 0x00000000, 0x432d65fa, |
| 0x3fa70153, 0x00000000, 0x00000000, 0x891a4602, 0xbf9d03ef, |
| 0xd62ca5f8, 0xbfca77d9, 0xb35f4628, 0x3f97a265, 0x433258fa, |
| 0x3fd8cf51, 0xb58fd909, 0xbf8f88e3, 0x01771cea, 0xbfc2b154, |
| 0xf3562f8e, 0x3f888f57, 0xc028a723, 0x3fc7370f, 0x20b7f9f0, |
| 0xbf80f44c, 0x214368e9, 0xbfb6dfaa, 0x28891863, 0x3f79b4b6, |
| 0x172dbbf0, 0x3fb6cb8e, 0xe0553158, 0xbfc975f5, 0x593fe814, |
| 0xbc2ef5d3, 0x00000000, 0x3ff00000, 0x03dec550, 0x3fa44203, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, |
| 0xbf953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, |
| 0x00000000, 0x00000000, 0xda5b7511, 0xbf85ad63, 0xdc230b9b, |
| 0xbfb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, |
| 0x77bb08ba, 0xbf757c85, 0xb6247521, 0xbfb1381e, 0x5922170c, |
| 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0xbf64e391, |
| 0x3e666320, 0xbfa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, |
| 0x3fafa8ae, 0x8c5b2da2, 0xbfb936bb, 0x4e88f7a5, 0xbc587d05, |
| 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000 |
| }; |
| |
| private static int[] maskThirtyFiveTan = { |
| 0xfffc0000, 0xffffffff, 0x00000000, 0x00000000 |
| }; |
| |
| private static int[] qElevenTan = { |
| 0xb8fe4d77, 0x3f82609a |
| }; |
| |
| private static int[] qNineTan = { |
| 0xbf847a43, 0x3f9664a0 |
| }; |
| |
| private static int[] qSevenTan = { |
| 0x52c4c8ab, 0x3faba1ba |
| }; |
| |
| private static int[] qFiveTan = { |
| 0x11092746, 0x3fc11111 |
| }; |
| |
| private static int[] qThreeTan = { |
| 0x55555612, 0x3fd55555 |
| }; |
| |
| private static int[] piInvTableTan = { |
| 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, |
| 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, |
| 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, |
| 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, |
| 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, |
| 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, |
| 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, |
| 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, |
| 0xf0cfbc21 |
| }; |
| |
| private static int[] piFourTan = { |
| 0x00000000, 0x3fe921fb, 0x4611a626, 0x3e85110b |
| }; |
| |
| private static int[] qqTwoTan = { |
| 0x676733af, 0x3d32e7b9 |
| }; |
| |
| private static int[] twoPowFiftyFiveTan = { |
| 0x00000000, 0x43600000 |
| }; |
| |
| private static int[] twoPowMFiftyFiveTan = { |
| 0x00000000, 0x3c800000 |
| }; |
| |
| public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { |
| ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16); |
| ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16); |
| ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16); |
| ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16); |
| ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16); |
| ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16); |
| ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16); |
| ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16); |
| ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16); |
| ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16); |
| ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16); |
| ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8); |
| ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16); |
| ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16); |
| ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16); |
| ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8); |
| ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8); |
| ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8); |
| ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8); |
| ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8); |
| |
| Label bb0 = new Label(); |
| Label bb1 = new Label(); |
| Label bb2 = new Label(); |
| Label bb3 = new Label(); |
| Label bb5 = new Label(); |
| Label bb6 = new Label(); |
| Label bb8 = new Label(); |
| Label bb9 = new Label(); |
| Label bb10 = new Label(); |
| Label bb11 = new Label(); |
| Label bb12 = new Label(); |
| Label bb13 = new Label(); |
| Label bb14 = new Label(); |
| Label bb15 = new Label(); |
| |
| Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); |
| Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); |
| Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); |
| Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); |
| Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); |
| Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); |
| Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); |
| Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); |
| Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); |
| Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); |
| |
| Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); |
| Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); |
| Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); |
| Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); |
| Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); |
| Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); |
| Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); |
| |
| setCrb(crb); |
| if (dest.encoding != value.encoding) { |
| masm.movdqu(dest, value); |
| } |
| |
| masm.pextrw(gpr1, dest, 3); |
| masm.andl(gpr1, 32767); |
| masm.subl(gpr1, 16314); |
| masm.cmpl(gpr1, 270); |
| masm.jcc(ConditionFlag.Above, bb0); |
| |
| masm.movdqu(temp5, externalAddress(oneHalfTanPtr)); // 0x00000000, |
| // 0x3fe00000, |
| // 0x00000000, |
| // 0x3fe00000 |
| masm.movdqu(temp6, externalAddress(mulSixteenPtr)); // 0x00000000, |
| // 0x40300000, |
| // 0x00000000, |
| // 0x3ff00000 |
| masm.unpcklpd(dest, dest); |
| masm.movdqu(temp4, externalAddress(signMaskTanPtr)); // 0x00000000, |
| // 0x80000000, |
| // 0x00000000, |
| // 0x80000000 |
| masm.andpd(temp4, dest); |
| masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr)); // 0x6dc9c883, |
| // 0x3fe45f30, |
| // 0x6dc9c883, |
| // 0x40245f30 |
| masm.mulpd(temp1, dest); |
| masm.por(temp5, temp4); |
| masm.addpd(temp1, temp5); |
| masm.movdqu(temp7, temp1); |
| masm.unpckhpd(temp7, temp7); |
| masm.cvttsd2sil(gpr4, temp7); |
| masm.cvttpd2dq(temp1, temp1); |
| masm.cvtdq2pd(temp1, temp1); |
| masm.mulpd(temp1, temp6); |
| masm.movdqu(temp3, externalAddress(pOneTanPtr)); // 0x54444000, |
| // 0x3fb921fb, |
| // 0x54440000, |
| // 0x3fb921fb |
| masm.movdq(temp5, externalAddress(qqTwoTanPtr)); // 0x676733af, |
| // 0x3d32e7b9 |
| masm.addq(gpr4, 469248); |
| masm.movdqu(temp4, externalAddress(pTwoTanPtr)); // 0x67674000, |
| // 0xbd32e7b9, |
| // 0x4c4c0000, |
| // 0x3d468c23 |
| masm.mulpd(temp3, temp1); |
| masm.andq(gpr4, 31); |
| masm.mulsd(temp5, temp1); |
| masm.movq(gpr3, gpr4); |
| masm.mulpd(temp4, temp1); |
| masm.shlq(gpr3, 1); |
| masm.subpd(dest, temp3); |
| masm.mulpd(temp1, externalAddress(pThreeTanPtr)); // 0x3707344a, |
| // 0x3aa8a2e0, |
| // 0x03707345, |
| // 0x3ae98a2e |
| masm.addq(gpr4, gpr3); |
| masm.shlq(gpr3, 2); |
| masm.addq(gpr4, gpr3); |
| masm.addsd(temp5, dest); |
| masm.movdqu(temp2, dest); |
| masm.subpd(dest, temp4); |
| masm.movdq(temp6, externalAddress(onePtr)); // 0x00000000, |
| // 0x3ff00000 |
| masm.shlq(gpr4, 4); |
| masm.leaq(gpr1, externalAddress(cTableTanPtr)); |
| masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr)); // 0xfffc0000, |
| // 0xffffffff, |
| // 0x00000000, |
| // 0x00000000 |
| masm.movdqu(temp3, dest); |
| masm.addq(gpr1, gpr4); |
| masm.subpd(temp2, dest); |
| masm.unpckhpd(dest, dest); |
| masm.divsd(temp6, temp5); |
| masm.subpd(temp2, temp4); |
| masm.movdqu(temp7, new AMD64Address(gpr1, 16)); |
| masm.subsd(temp3, temp5); |
| masm.mulpd(temp7, dest); |
| masm.subpd(temp2, temp1); |
| masm.movdqu(temp1, new AMD64Address(gpr1, 48)); |
| masm.mulpd(temp1, dest); |
| masm.movdqu(temp4, new AMD64Address(gpr1, 96)); |
| masm.mulpd(temp4, dest); |
| masm.addsd(temp2, temp3); |
| masm.movdqu(temp3, dest); |
| masm.mulpd(dest, dest); |
| masm.addpd(temp7, new AMD64Address(gpr1, 0)); |
| masm.addpd(temp1, new AMD64Address(gpr1, 32)); |
| masm.mulpd(temp1, dest); |
| masm.addpd(temp4, new AMD64Address(gpr1, 80)); |
| masm.addpd(temp7, temp1); |
| masm.movdqu(temp1, new AMD64Address(gpr1, 112)); |
| masm.mulpd(temp1, dest); |
| masm.mulpd(dest, dest); |
| masm.addpd(temp4, temp1); |
| masm.movdqu(temp1, new AMD64Address(gpr1, 64)); |
| masm.mulpd(temp1, dest); |
| masm.addpd(temp7, temp1); |
| masm.movdqu(temp1, temp3); |
| masm.mulpd(temp3, dest); |
| masm.mulsd(dest, dest); |
| masm.mulpd(temp1, new AMD64Address(gpr1, 144)); |
| masm.mulpd(temp4, temp3); |
| masm.movdqu(temp3, temp1); |
| masm.addpd(temp7, temp4); |
| masm.movdqu(temp4, temp1); |
| masm.mulsd(dest, temp7); |
| masm.unpckhpd(temp7, temp7); |
| masm.addsd(dest, temp7); |
| masm.unpckhpd(temp1, temp1); |
| masm.addsd(temp3, temp1); |
| masm.subsd(temp4, temp3); |
| masm.addsd(temp1, temp4); |
| masm.movdqu(temp4, temp2); |
| masm.movdq(temp7, new AMD64Address(gpr1, 144)); |
| masm.unpckhpd(temp2, temp2); |
| masm.addsd(temp7, new AMD64Address(gpr1, 152)); |
| masm.mulsd(temp7, temp2); |
| masm.addsd(temp7, new AMD64Address(gpr1, 136)); |
| masm.addsd(temp7, temp1); |
| masm.addsd(dest, temp7); |
| masm.movdq(temp7, externalAddress(onePtr)); // 0x00000000, |
| // 0x3ff00000 |
| masm.mulsd(temp4, temp6); |
| masm.movdq(temp2, new AMD64Address(gpr1, 168)); |
| masm.andpd(temp2, temp6); |
| masm.mulsd(temp5, temp2); |
| masm.mulsd(temp6, new AMD64Address(gpr1, 160)); |
| masm.subsd(temp7, temp5); |
| masm.subsd(temp2, new AMD64Address(gpr1, 128)); |
| masm.subsd(temp7, temp4); |
| masm.mulsd(temp7, temp6); |
| masm.movdqu(temp4, temp3); |
| masm.subsd(temp3, temp2); |
| masm.addsd(temp2, temp3); |
| masm.subsd(temp4, temp2); |
| masm.addsd(dest, temp4); |
| masm.subsd(dest, temp7); |
| masm.addsd(dest, temp3); |
| masm.jmp(bb15); |
| |
| masm.bind(bb0); |
| masm.jcc(ConditionFlag.Greater, bb1); |
| |
| masm.pextrw(gpr1, dest, 3); |
| masm.movl(gpr4, gpr1); |
| masm.andl(gpr1, 32752); |
| masm.jcc(ConditionFlag.Equal, bb2); |
| |
| masm.andl(gpr4, 32767); |
| masm.cmpl(gpr4, 15904); |
| masm.jcc(ConditionFlag.Below, bb3); |
| |
| masm.movdqu(temp2, dest); |
| masm.movdqu(temp3, dest); |
| masm.movdq(temp1, externalAddress(qElevenTanPtr)); // 0xb8fe4d77, |
| // 0x3f82609a |
| masm.mulsd(temp2, dest); |
| masm.mulsd(temp3, temp2); |
| masm.mulsd(temp1, temp2); |
| masm.addsd(temp1, externalAddress(qNineTanPtr)); // 0xbf847a43, |
| // 0x3f9664a0 |
| masm.mulsd(temp1, temp2); |
| masm.addsd(temp1, externalAddress(qSevenTanPtr)); // 0x52c4c8ab, |
| // 0x3faba1ba |
| masm.mulsd(temp1, temp2); |
| masm.addsd(temp1, externalAddress(qFiveTanPtr)); // 0x11092746, |
| // 0x3fc11111 |
| masm.mulsd(temp1, temp2); |
| masm.addsd(temp1, externalAddress(qThreeTanPtr)); // 0x55555612, |
| // 0x3fd55555 |
| masm.mulsd(temp1, temp3); |
| masm.addsd(dest, temp1); |
| masm.jmp(bb15); |
| |
| masm.bind(bb3); |
| masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr)); // 0x00000000, |
| // 0x43600000 |
| masm.mulsd(temp3, dest); |
| masm.addsd(dest, temp3); |
| masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr)); // 0x00000000, |
| // 0x3c800000 |
| masm.jmp(bb15); |
| |
| masm.bind(bb14); |
| masm.xorpd(temp1, temp1); |
| masm.xorpd(dest, dest); |
| masm.divsd(dest, temp1); |
| masm.jmp(bb15); |
| |
| masm.bind(bb2); |
| masm.movdqu(temp1, dest); |
| masm.mulsd(temp1, temp1); |
| masm.jmp(bb15); |
| |
| masm.bind(bb1); |
| masm.pextrw(gpr3, dest, 3); |
| masm.andl(gpr3, 32752); |
| masm.cmpl(gpr3, 32752); |
| masm.jcc(ConditionFlag.Equal, bb14); |
| |
| masm.subl(gpr3, 16224); |
| masm.shrl(gpr3, 7); |
| masm.andl(gpr3, 65532); |
| masm.leaq(gpr10, externalAddress(piInvTableTanPtr)); |
| masm.addq(gpr3, gpr10); |
| masm.movdq(gpr1, dest); |
| masm.movl(gpr9, new AMD64Address(gpr3, 20)); |
| masm.movl(gpr7, new AMD64Address(gpr3, 24)); |
| masm.movl(gpr4, gpr1); |
| masm.shrq(gpr1, 21); |
| masm.orl(gpr1, Integer.MIN_VALUE); |
| masm.shrl(gpr1, 11); |
| masm.movl(gpr8, gpr9); |
| masm.imulq(gpr9, gpr4); |
| masm.imulq(gpr8, gpr1); |
| masm.imulq(gpr7, gpr1); |
| masm.movl(gpr5, new AMD64Address(gpr3, 16)); |
| masm.movl(gpr6, new AMD64Address(gpr3, 12)); |
| masm.movl(gpr10, gpr9); |
| masm.shrq(gpr9, 32); |
| masm.addq(gpr8, gpr9); |
| masm.addq(gpr10, gpr7); |
| masm.movl(gpr7, gpr10); |
| masm.shrq(gpr10, 32); |
| masm.addq(gpr8, gpr10); |
| masm.movl(gpr9, gpr5); |
| masm.imulq(gpr5, gpr4); |
| masm.imulq(gpr9, gpr1); |
| masm.movl(gpr10, gpr6); |
| masm.imulq(gpr6, gpr4); |
| masm.movl(gpr2, gpr5); |
| masm.shrq(gpr5, 32); |
| masm.addq(gpr8, gpr2); |
| masm.movl(gpr2, gpr8); |
| masm.shrq(gpr8, 32); |
| masm.addq(gpr9, gpr5); |
| masm.addq(gpr9, gpr8); |
| masm.shlq(gpr2, 32); |
| masm.orq(gpr7, gpr2); |
| masm.imulq(gpr10, gpr1); |
| masm.movl(gpr8, new AMD64Address(gpr3, 8)); |
| masm.movl(gpr5, new AMD64Address(gpr3, 4)); |
| masm.movl(gpr2, gpr6); |
| masm.shrq(gpr6, 32); |
| masm.addq(gpr9, gpr2); |
| masm.movl(gpr2, gpr9); |
| masm.shrq(gpr9, 32); |
| masm.addq(gpr10, gpr6); |
| masm.addq(gpr10, gpr9); |
| masm.movq(gpr6, gpr8); |
| masm.imulq(gpr8, gpr4); |
| masm.imulq(gpr6, gpr1); |
| masm.movl(gpr9, gpr8); |
| masm.shrq(gpr8, 32); |
| masm.addq(gpr10, gpr9); |
| masm.movl(gpr9, gpr10); |
| masm.shrq(gpr10, 32); |
| masm.addq(gpr6, gpr8); |
| masm.addq(gpr6, gpr10); |
| masm.movq(gpr8, gpr5); |
| masm.imulq(gpr5, gpr4); |
| masm.imulq(gpr8, gpr1); |
| masm.shlq(gpr9, 32); |
| masm.orq(gpr9, gpr2); |
| masm.movl(gpr1, new AMD64Address(gpr3, 0)); |
| masm.movl(gpr10, gpr5); |
| masm.shrq(gpr5, 32); |
| masm.addq(gpr6, gpr10); |
| masm.movl(gpr10, gpr6); |
| masm.shrq(gpr6, 32); |
| masm.addq(gpr8, gpr5); |
| masm.addq(gpr8, gpr6); |
| masm.imulq(gpr4, gpr1); |
| masm.pextrw(gpr2, dest, 3); |
| masm.leaq(gpr6, externalAddress(piInvTableTanPtr)); |
| masm.subq(gpr3, gpr6); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, gpr3); |
| masm.addl(gpr3, 19); |
| masm.movl(gpr5, 32768); |
| masm.andl(gpr5, gpr2); |
| masm.shrl(gpr2, 4); |
| masm.andl(gpr2, 2047); |
| masm.subl(gpr2, 1023); |
| masm.subl(gpr3, gpr2); |
| masm.addq(gpr8, gpr4); |
| masm.movl(gpr4, gpr3); |
| masm.addl(gpr4, 32); |
| masm.cmpl(gpr3, 0); |
| masm.jcc(ConditionFlag.Less, bb5); |
| |
| masm.negl(gpr3); |
| masm.addl(gpr3, 29); |
| masm.shll(gpr8); |
| masm.movl(gpr6, gpr8); |
| masm.andl(gpr8, 1073741823); |
| masm.testl(gpr8, 536870912); |
| masm.jcc(ConditionFlag.NotEqual, bb6); |
| |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 0); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| |
| masm.bind(bb8); |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.Equal, bb9); |
| |
| masm.bind(bb10); |
| masm.bsrq(gpr10, gpr8); |
| masm.movl(gpr3, 29); |
| masm.subl(gpr3, gpr10); |
| masm.jcc(ConditionFlag.LessEqual, bb11); |
| |
| masm.shlq(gpr8); |
| masm.movq(gpr1, gpr9); |
| masm.shlq(gpr9); |
| masm.addl(gpr4, gpr3); |
| masm.negl(gpr3); |
| masm.addl(gpr3, 64); |
| masm.shrq(gpr1); |
| masm.shrq(gpr7); |
| masm.orq(gpr8, gpr1); |
| masm.orq(gpr9, gpr7); |
| |
| masm.bind(bb12); |
| masm.cvtsi2sdq(dest, gpr8); |
| masm.shrq(gpr9, 1); |
| masm.cvtsi2sdq(temp3, gpr9); |
| masm.xorpd(temp4, temp4); |
| masm.shll(gpr4, 4); |
| masm.negl(gpr4); |
| masm.addl(gpr4, 16368); |
| masm.orl(gpr4, gpr5); |
| masm.xorl(gpr4, gpr2); |
| masm.pinsrw(temp4, gpr4, 3); |
| masm.leaq(gpr1, externalAddress(piFourTanPtr)); |
| masm.movdq(temp2, new AMD64Address(gpr1, 0)); // 0x00000000, |
| // 0x3fe921fb, |
| masm.movdq(temp7, new AMD64Address(gpr1, 8)); // 0x4611a626, |
| // 0x3e85110b |
| masm.xorpd(temp5, temp5); |
| masm.subl(gpr4, 1008); |
| masm.pinsrw(temp5, gpr4, 3); |
| masm.mulsd(dest, temp4); |
| masm.shll(gpr5, 16); |
| masm.sarl(gpr5, 31); |
| masm.mulsd(temp3, temp5); |
| masm.movdqu(temp1, dest); |
| masm.mulsd(dest, temp2); |
| masm.shrl(gpr6, 30); |
| masm.addsd(temp1, temp3); |
| masm.mulsd(temp3, temp2); |
| masm.addl(gpr6, gpr5); |
| masm.xorl(gpr6, gpr5); |
| masm.mulsd(temp7, temp1); |
| masm.movl(gpr1, gpr6); |
| masm.addsd(temp7, temp3); |
| masm.movdqu(temp2, dest); |
| masm.addsd(dest, temp7); |
| masm.subsd(temp2, dest); |
| masm.addsd(temp7, temp2); |
| masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr)); // 0x6dc9c883, |
| // 0x3fe45f30, |
| // 0x6dc9c883, |
| // 0x40245f30 |
| if (masm.supports(CPUFeature.SSE3)) { |
| masm.movddup(dest, dest); |
| } else { |
| masm.movlhps(dest, dest); |
| } |
| masm.movdqu(temp4, externalAddress(signMaskTanPtr)); // 0x00000000, |
| // 0x80000000, |
| // 0x00000000, |
| // 0x80000000 |
| masm.andpd(temp4, dest); |
| masm.mulpd(temp1, dest); |
| if (masm.supports(CPUFeature.SSE3)) { |
| masm.movddup(temp7, temp7); |
| } else { |
| masm.movlhps(temp7, temp7); |
| } |
| masm.movdqu(temp5, externalAddress(oneHalfTanPtr)); // 0x00000000, |
| // 0x3fe00000, |
| // 0x00000000, |
| // 0x3fe00000 |
| masm.movdqu(temp6, externalAddress(mulSixteenPtr)); // 0x00000000, |
| // 0x40300000, |
| // 0x00000000, |
| // 0x3ff00000 |
| masm.por(temp5, temp4); |
| masm.addpd(temp1, temp5); |
| masm.movdqu(temp5, temp1); |
| masm.unpckhpd(temp5, temp5); |
| masm.cvttsd2sil(gpr4, temp5); |
| masm.cvttpd2dq(temp1, temp1); |
| masm.cvtdq2pd(temp1, temp1); |
| masm.mulpd(temp1, temp6); |
| masm.movdqu(temp3, externalAddress(pOneTanPtr)); // 0x54444000, |
| // 0x3fb921fb, |
| // 0x54440000, |
| // 0x3fb921fb |
| masm.movdq(temp5, externalAddress(qqTwoTanPtr)); // 0x676733af, |
| // 0x3d32e7b9 |
| masm.shll(gpr1, 4); |
| masm.addl(gpr4, 469248); |
| masm.movdqu(temp4, externalAddress(pTwoTanPtr)); // 0x67674000, |
| // 0xbd32e7b9, |
| // 0x4c4c0000, |
| // 0x3d468c23 |
| masm.mulpd(temp3, temp1); |
| masm.addl(gpr4, gpr1); |
| masm.andl(gpr4, 31); |
| masm.mulsd(temp5, temp1); |
| masm.movl(gpr3, gpr4); |
| masm.mulpd(temp4, temp1); |
| masm.shll(gpr3, 1); |
| masm.subpd(dest, temp3); |
| masm.mulpd(temp1, externalAddress(pThreeTanPtr)); // 0x3707344a, |
| // 0x3aa8a2e0, |
| // 0x03707345, |
| // 0x3ae98a2e |
| masm.addl(gpr4, gpr3); |
| masm.shll(gpr3, 2); |
| masm.addl(gpr4, gpr3); |
| masm.addsd(temp5, dest); |
| masm.movdqu(temp2, dest); |
| masm.subpd(dest, temp4); |
| masm.movdq(temp6, externalAddress(onePtr)); // 0x00000000, |
| // 0x3ff00000 |
| masm.shll(gpr4, 4); |
| masm.leaq(gpr1, externalAddress(cTableTanPtr)); |
| masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr)); // 0xfffc0000, |
| // 0xffffffff, |
| // 0x00000000, |
| // 0x00000000 |
| masm.movdqu(temp3, dest); |
| masm.addq(gpr1, gpr4); |
| masm.subpd(temp2, dest); |
| masm.unpckhpd(dest, dest); |
| masm.divsd(temp6, temp5); |
| masm.subpd(temp2, temp4); |
| masm.subsd(temp3, temp5); |
| masm.subpd(temp2, temp1); |
| masm.movdqu(temp1, new AMD64Address(gpr1, 48)); |
| masm.addpd(temp2, temp7); |
| masm.movdqu(temp7, new AMD64Address(gpr1, 16)); |
| masm.mulpd(temp7, dest); |
| masm.movdqu(temp4, new AMD64Address(gpr1, 96)); |
| masm.mulpd(temp1, dest); |
| masm.mulpd(temp4, dest); |
| masm.addsd(temp2, temp3); |
| masm.movdqu(temp3, dest); |
| masm.mulpd(dest, dest); |
| masm.addpd(temp7, new AMD64Address(gpr1, 0)); |
| masm.addpd(temp1, new AMD64Address(gpr1, 32)); |
| masm.mulpd(temp1, dest); |
| masm.addpd(temp4, new AMD64Address(gpr1, 80)); |
| masm.addpd(temp7, temp1); |
| masm.movdqu(temp1, new AMD64Address(gpr1, 112)); |
| masm.mulpd(temp1, dest); |
| masm.mulpd(dest, dest); |
| masm.addpd(temp4, temp1); |
| masm.movdqu(temp1, new AMD64Address(gpr1, 64)); |
| masm.mulpd(temp1, dest); |
| masm.addpd(temp7, temp1); |
| masm.movdqu(temp1, temp3); |
| masm.mulpd(temp3, dest); |
| masm.mulsd(dest, dest); |
| masm.mulpd(temp1, new AMD64Address(gpr1, 144)); |
| masm.mulpd(temp4, temp3); |
| masm.movdqu(temp3, temp1); |
| masm.addpd(temp7, temp4); |
| masm.movdqu(temp4, temp1); |
| masm.mulsd(dest, temp7); |
| masm.unpckhpd(temp7, temp7); |
| masm.addsd(dest, temp7); |
| masm.unpckhpd(temp1, temp1); |
| masm.addsd(temp3, temp1); |
| masm.subsd(temp4, temp3); |
| masm.addsd(temp1, temp4); |
| masm.movdqu(temp4, temp2); |
| masm.movdq(temp7, new AMD64Address(gpr1, 144)); |
| masm.unpckhpd(temp2, temp2); |
| masm.addsd(temp7, new AMD64Address(gpr1, 152)); |
| masm.mulsd(temp7, temp2); |
| masm.addsd(temp7, new AMD64Address(gpr1, 136)); |
| masm.addsd(temp7, temp1); |
| masm.addsd(dest, temp7); |
| masm.movdq(temp7, externalAddress(onePtr)); // 0x00000000, |
| // 0x3ff00000 |
| masm.mulsd(temp4, temp6); |
| masm.movdq(temp2, new AMD64Address(gpr1, 168)); |
| masm.andpd(temp2, temp6); |
| masm.mulsd(temp5, temp2); |
| masm.mulsd(temp6, new AMD64Address(gpr1, 160)); |
| masm.subsd(temp7, temp5); |
| masm.subsd(temp2, new AMD64Address(gpr1, 128)); |
| masm.subsd(temp7, temp4); |
| masm.mulsd(temp7, temp6); |
| masm.movdqu(temp4, temp3); |
| masm.subsd(temp3, temp2); |
| masm.addsd(temp2, temp3); |
| masm.subsd(temp4, temp2); |
| masm.addsd(dest, temp4); |
| masm.subsd(dest, temp7); |
| masm.addsd(dest, temp3); |
| masm.jmp(bb15); |
| |
| masm.bind(bb9); |
| masm.addl(gpr4, 64); |
| masm.movq(gpr8, gpr9); |
| masm.movq(gpr9, gpr7); |
| masm.movl(gpr7, 0); |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.NotEqual, bb10); |
| |
| masm.addl(gpr4, 64); |
| masm.movq(gpr8, gpr9); |
| masm.movq(gpr9, gpr7); |
| masm.cmpq(gpr8, 0); |
| masm.jcc(ConditionFlag.NotEqual, bb10); |
| |
| masm.jmp(bb12); |
| |
| masm.bind(bb11); |
| masm.jcc(ConditionFlag.Equal, bb12); |
| |
| masm.negl(gpr3); |
| masm.shrq(gpr9); |
| masm.movq(gpr1, gpr8); |
| masm.shrq(gpr8); |
| masm.subl(gpr4, gpr3); |
| masm.negl(gpr3); |
| masm.addl(gpr3, 64); |
| masm.shlq(gpr1); |
| masm.orq(gpr9, gpr1); |
| masm.jmp(bb12); |
| |
| masm.bind(bb5); |
| masm.notl(gpr3); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| masm.shlq(gpr8); |
| masm.movq(gpr6, gpr8); |
| masm.testl(gpr8, Integer.MIN_VALUE); |
| masm.jcc(ConditionFlag.NotEqual, bb13); |
| |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 0); |
| masm.shrq(gpr6, 2); |
| masm.jmp(bb8); |
| |
| masm.bind(bb6); |
| masm.shrl(gpr8); |
| masm.movl(gpr2, 1073741824); |
| masm.shrl(gpr2); |
| masm.shlq(gpr8, 32); |
| masm.orq(gpr8, gpr10); |
| masm.shlq(gpr2, 32); |
| masm.addl(gpr6, 1073741824); |
| masm.movl(gpr3, 0); |
| masm.movl(gpr10, 0); |
| masm.subq(gpr3, gpr7); |
| masm.sbbq(gpr10, gpr9); |
| masm.sbbq(gpr2, gpr8); |
| masm.movq(gpr7, gpr3); |
| masm.movq(gpr9, gpr10); |
| masm.movq(gpr8, gpr2); |
| masm.movl(gpr2, 32768); |
| masm.jmp(bb8); |
| |
| masm.bind(bb13); |
| masm.shrl(gpr8); |
| masm.movq(gpr2, 0x100000000L); |
| masm.shrq(gpr2); |
| masm.movl(gpr3, 0); |
| masm.movl(gpr10, 0); |
| masm.subq(gpr3, gpr7); |
| masm.sbbq(gpr10, gpr9); |
| masm.sbbq(gpr2, gpr8); |
| masm.movq(gpr7, gpr3); |
| masm.movq(gpr9, gpr10); |
| masm.movq(gpr8, gpr2); |
| masm.movl(gpr2, 32768); |
| masm.shrq(gpr6, 2); |
| masm.addl(gpr6, 1073741824); |
| masm.jmp(bb8); |
| |
| masm.bind(bb15); |
| } |
| |
| /* |
| * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) |
| * Source Code |
| * |
| * ALGORITHM DESCRIPTION - EXP() --------------------- |
| * |
| * Description: Let K = 64 (table size). x x/log(2) n e = 2 = 2 * T[j] * (1 + P(y)) where x = |
| * m*log(2)/K + y, y in [-log(2)/K..log(2)/K] m = n*K + j, m,n,j - signed integer, j in |
| * [-K/2..K/2] j/K values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). |
| * |
| * P(y) is a minimax polynomial approximation of exp(x)-1 on small interval |
| * [-log(2)/K..log(2)/K] (were calculated by Maple V). |
| * |
| * To avoid problems with arithmetic overflow and underflow, n n1 n2 value of 2 is safely |
| * computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] where BIAS is a value of exponent bias. |
| * |
| * Special cases: exp(NaN) = NaN exp(+INF) = +INF exp(-INF) = 0 exp(x) = 1 for subnormals for |
| * finite argument, only exp(0)=1 is exact For IEEE double if x > 709.782712893383973096 then |
| * exp(x) overflow if x < -745.133219101941108420 then exp(x) underflow |
| * |
| */ |
| |
| private static int[] cvExp = { |
| 0x652b82fe, 0x40571547, 0x652b82fe, 0x40571547, 0xfefa0000, |
| 0x3f862e42, 0xfefa0000, 0x3f862e42, 0xbc9e3b3a, 0x3d1cf79a, |
| 0xbc9e3b3a, 0x3d1cf79a, 0xfffffffe, 0x3fdfffff, 0xfffffffe, |
| 0x3fdfffff, 0xe3289860, 0x3f56c15c, 0x555b9e25, 0x3fa55555, |
| 0xc090cf0f, 0x3f811115, 0x55548ba1, 0x3fc55555 |
| }; |
| |
| private static int[] shifterExp = { |
| 0x00000000, 0x43380000, 0x00000000, 0x43380000 |
| }; |
| |
| private static int[] mMaskExp = { |
| 0xffffffc0, 0x00000000, 0xffffffc0, 0x00000000 |
| }; |
| |
| private static int[] biasExp = { |
| 0x0000ffc0, 0x00000000, 0x0000ffc0, 0x00000000 |
| }; |
| |
| private static int[] tblAddrExp = { |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0e03754d, |
| 0x3cad7bbf, 0x3e778060, 0x00002c9a, 0x3567f613, 0x3c8cd252, |
| 0xd3158574, 0x000059b0, 0x61e6c861, 0x3c60f74e, 0x18759bc8, |
| 0x00008745, 0x5d837b6c, 0x3c979aa6, 0x6cf9890f, 0x0000b558, |
| 0x702f9cd1, 0x3c3ebe3d, 0x32d3d1a2, 0x0000e3ec, 0x1e63bcd8, |
| 0x3ca3516e, 0xd0125b50, 0x00011301, 0x26f0387b, 0x3ca4c554, |
| 0xaea92ddf, 0x0001429a, 0x62523fb6, 0x3ca95153, 0x3c7d517a, |
| 0x000172b8, 0x3f1353bf, 0x3c8b898c, 0xeb6fcb75, 0x0001a35b, |
| 0x3e3a2f5f, 0x3c9aecf7, 0x3168b9aa, 0x0001d487, 0x44a6c38d, |
| 0x3c8a6f41, 0x88628cd6, 0x0002063b, 0xe3a8a894, 0x3c968efd, |
| 0x6e756238, 0x0002387a, 0x981fe7f2, 0x3c80472b, 0x65e27cdd, |
| 0x00026b45, 0x6d09ab31, 0x3c82f7e1, 0xf51fdee1, 0x00029e9d, |
| 0x720c0ab3, 0x3c8b3782, 0xa6e4030b, 0x0002d285, 0x4db0abb6, |
| 0x3c834d75, 0x0a31b715, 0x000306fe, 0x5dd3f84a, 0x3c8fdd39, |
| 0xb26416ff, 0x00033c08, 0xcc187d29, 0x3ca12f8c, 0x373aa9ca, |
| 0x000371a7, 0x738b5e8b, 0x3ca7d229, 0x34e59ff6, 0x0003a7db, |
| 0xa72a4c6d, 0x3c859f48, 0x4c123422, 0x0003dea6, 0x259d9205, |
| 0x3ca8b846, 0x21f72e29, 0x0004160a, 0x60c2ac12, 0x3c4363ed, |
| 0x6061892d, 0x00044e08, 0xdaa10379, 0x3c6ecce1, 0xb5c13cd0, |
| 0x000486a2, 0xbb7aafb0, 0x3c7690ce, 0xd5362a27, 0x0004bfda, |
| 0x9b282a09, 0x3ca083cc, 0x769d2ca6, 0x0004f9b2, 0xc1aae707, |
| 0x3ca509b0, 0x569d4f81, 0x0005342b, 0x18fdd78e, 0x3c933505, |
| 0x36b527da, 0x00056f47, 0xe21c5409, 0x3c9063e1, 0xdd485429, |
| 0x0005ab07, 0x2b64c035, 0x3c9432e6, 0x15ad2148, 0x0005e76f, |
| 0x99f08c0a, 0x3ca01284, 0xb03a5584, 0x0006247e, 0x0073dc06, |
| 0x3c99f087, 0x82552224, 0x00066238, 0x0da05571, 0x3c998d4d, |
| 0x667f3bcc, 0x0006a09e, 0x86ce4786, 0x3ca52bb9, 0x3c651a2e, |
| 0x0006dfb2, 0x206f0dab, 0x3ca32092, 0xe8ec5f73, 0x00071f75, |
| 0x8e17a7a6, 0x3ca06122, 0x564267c8, 0x00075feb, 0x461e9f86, |
| 0x3ca244ac, 0x73eb0186, 0x0007a114, 0xabd66c55, 0x3c65ebe1, |
| 0x36cf4e62, 0x0007e2f3, 0xbbff67d0, 0x3c96fe9f, 0x994cce12, |
| 0x00082589, 0x14c801df, 0x3c951f14, 0x9b4492ec, 0x000868d9, |
| 0xc1f0eab4, 0x3c8db72f, 0x422aa0db, 0x0008ace5, 0x59f35f44, |
| 0x3c7bf683, 0x99157736, 0x0008f1ae, 0x9c06283c, 0x3ca360ba, |
| 0xb0cdc5e4, 0x00093737, 0x20f962aa, 0x3c95e8d1, 0x9fde4e4f, |
| 0x00097d82, 0x2b91ce27, 0x3c71affc, 0x82a3f090, 0x0009c491, |
| 0x589a2ebd, 0x3c9b6d34, 0x7b5de564, 0x000a0c66, 0x9ab89880, |
| 0x3c95277c, 0xb23e255c, 0x000a5503, 0x6e735ab3, 0x3c846984, |
| 0x5579fdbf, 0x000a9e6b, 0x92cb3387, 0x3c8c1a77, 0x995ad3ad, |
| 0x000ae89f, 0xdc2d1d96, 0x3ca22466, 0xb84f15fa, 0x000b33a2, |
| 0xb19505ae, 0x3ca1112e, 0xf2fb5e46, 0x000b7f76, 0x0a5fddcd, |
| 0x3c74ffd7, 0x904bc1d2, 0x000bcc1e, 0x30af0cb3, 0x3c736eae, |
| 0xdd85529c, 0x000c199b, 0xd10959ac, 0x3c84e08f, 0x2e57d14b, |
| 0x000c67f1, 0x6c921968, 0x3c676b2c, 0xdcef9069, 0x000cb720, |
| 0x36df99b3, 0x3c937009, 0x4a07897b, 0x000d072d, 0xa63d07a7, |
| 0x3c74a385, 0xdcfba487, 0x000d5818, 0xd5c192ac, 0x3c8e5a50, |
| 0x03db3285, 0x000da9e6, 0x1c4a9792, 0x3c98bb73, 0x337b9b5e, |
| 0x000dfc97, 0x603a88d3, 0x3c74b604, 0xe78b3ff6, 0x000e502e, |
| 0x92094926, 0x3c916f27, 0xa2a490d9, 0x000ea4af, 0x41aa2008, |
| 0x3c8ec3bc, 0xee615a27, 0x000efa1b, 0x31d185ee, 0x3c8a64a9, |
| 0x5b6e4540, 0x000f5076, 0x4d91cd9d, 0x3c77893b, 0x819e90d8, |
| 0x000fa7c1 |
| }; |
| |
| private static int[] allOnesExp = { |
| 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff |
| }; |
| |
| private static int[] expBias = { |
| 0x00000000, 0x3ff00000, 0x00000000, 0x3ff00000 |
| }; |
| |
| private static int[] xMaxExp = { |
| 0xffffffff, 0x7fefffff |
| }; |
| |
| private static int[] xMinExp = { |
| 0x00000000, 0x00100000 |
| }; |
| |
| private static int[] infExp = { |
| 0x00000000, 0x7ff00000 |
| }; |
| |
| private static int[] zeroExp = { |
| 0x00000000, 0x00000000 |
| }; |
| |
| public void expIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { |
| ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 16); |
| ArrayDataPointerConstant cvExpPtr = new ArrayDataPointerConstant(cvExp, 16); |
| ArrayDataPointerConstant shifterExpPtr = new ArrayDataPointerConstant(shifterExp, 8); |
| ArrayDataPointerConstant mMaskExpPtr = new ArrayDataPointerConstant(mMaskExp, 16); |
| ArrayDataPointerConstant biasExpPtr = new ArrayDataPointerConstant(biasExp, 16); |
| ArrayDataPointerConstant tblAddrExpPtr = new ArrayDataPointerConstant(tblAddrExp, 16); |
| ArrayDataPointerConstant expBiasPtr = new ArrayDataPointerConstant(expBias, 8); |
| ArrayDataPointerConstant xMaxExpPtr = new ArrayDataPointerConstant(xMaxExp, 8); |
| ArrayDataPointerConstant xMinExpPtr = new ArrayDataPointerConstant(xMinExp, 8); |
| ArrayDataPointerConstant infExpPtr = new ArrayDataPointerConstant(infExp, 8); |
| ArrayDataPointerConstant zeroExpPtr = new ArrayDataPointerConstant(zeroExp, 8); |
| ArrayDataPointerConstant allOnesExpPtr = new ArrayDataPointerConstant(allOnesExp, 8); |
| |
| Label bb0 = new Label(); |
| Label bb1 = new Label(); |
| Label bb2 = new Label(); |
| Label bb3 = new Label(); |
| Label bb4 = new Label(); |
| Label bb5 = new Label(); |
| Label bb7 = new Label(); |
| Label bb8 = new Label(); |
| Label bb9 = new Label(); |
| Label bb10 = new Label(); |
| Label bb11 = new Label(); |
| Label bb12 = new Label(); |
| Label bb14 = new Label(); |
| |
| Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); |
| Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); |
| Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); |
| Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); |
| Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); |
| |
| Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); |
| Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); |
| Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); |
| Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); |
| Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); |
| Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); |
| Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); |
| Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); |
| Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); |
| Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE); |
| |
| AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); |
| |
| setCrb(crb); |
| masm.movsd(stackSlot, value); |
| if (dest.encoding != value.encoding) { |
| masm.movdqu(dest, value); |
| } |
| |
| masm.movdqu(temp9, externalAddress(mMaskExpPtr)); // 0xffffffc0, |
| // 0x00000000, |
| // 0xffffffc0, |
| // 0x00000000 |
| masm.movdqu(temp10, externalAddress(biasExpPtr)); // 0x0000ffc0, |
| // 0x00000000, |
| // 0x0000ffc0, |
| // 0x00000000 |
| masm.unpcklpd(dest, dest); |
| masm.leaq(gpr5, stackSlot); |
| masm.leaq(gpr2, externalAddress(cvExpPtr)); |
| masm.movdqu(temp1, new AMD64Address(gpr2, 0)); // 0x652b82fe, |
| // 0x40571547, |
| // 0x652b82fe, |
| // 0x40571547 |
| masm.movdqu(temp6, externalAddress(shifterExpPtr)); // 0x00000000, |
| // 0x43380000, |
| // 0x00000000, |
| // 0x43380000 |
| masm.movdqu(temp2, new AMD64Address(gpr2, 16)); // 0xfefa0000, |
| // 0x3f862e42, |
| // 0xfefa0000, |
| // 0x3f862e42 |
| masm.movdqu(temp3, new AMD64Address(gpr2, 32)); // 0xbc9e3b3a, |
| // 0x3d1cf79a, |
| // 0xbc9e3b3a, |
| // 0x3d1cf79a |
| masm.pextrw(gpr1, dest, 3); |
| masm.andl(gpr1, 32767); |
| masm.movl(gpr4, 16527); |
| masm.subl(gpr4, gpr1); |
| masm.subl(gpr1, 15504); |
| masm.orl(gpr4, gpr1); |
| masm.cmpl(gpr4, Integer.MIN_VALUE); |
| masm.jcc(ConditionFlag.AboveEqual, bb0); |
| |
| masm.leaq(gpr4, externalAddress(tblAddrExpPtr)); |
| masm.movdqu(temp8, new AMD64Address(gpr2, 48)); // 0xfffffffe, |
| // 0x3fdfffff, |
| // 0xfffffffe, |
| // 0x3fdfffff |
| masm.movdqu(temp4, new AMD64Address(gpr2, 64)); // 0xe3289860, |
| // 0x3f56c15c, |
| // 0x555b9e25, |
| // 0x3fa55555 |
| masm.movdqu(temp5, new AMD64Address(gpr2, 80)); // 0xc090cf0f, |
| // 0x3f811115, |
| // 0x55548ba1, |
| // 0x3fc55555 |
| masm.mulpd(temp1, dest); |
| masm.addpd(temp1, temp6); |
| masm.movapd(temp7, temp1); |
| masm.movdl(gpr1, temp1); |
| masm.pand(temp7, temp9); |
| masm.subpd(temp1, temp6); |
| masm.mulpd(temp2, temp1); |
| masm.mulpd(temp3, temp1); |
| masm.paddq(temp7, temp10); |
| masm.subpd(dest, temp2); |
| masm.movl(gpr3, gpr1); |
| masm.andl(gpr3, 63); |
| masm.shll(gpr3, 4); |
| masm.movdqu(temp2, new AMD64Address(gpr3, gpr4, Scale.Times1, 0)); |
| masm.sarl(gpr1, 6); |
| masm.psllq(temp7, 46); |
| masm.subpd(dest, temp3); |
| masm.mulpd(temp4, dest); |
| masm.movl(gpr4, gpr1); |
| masm.movapd(temp6, dest); |
| masm.movapd(temp1, dest); |
| masm.mulpd(temp6, temp6); |
| masm.mulpd(dest, temp6); |
| masm.addpd(temp5, temp4); |
| masm.mulsd(dest, temp6); |
| masm.mulpd(temp6, temp8); |
| masm.addsd(temp1, temp2); |
| masm.unpckhpd(temp2, temp2); |
| masm.mulpd(dest, temp5); |
| masm.addsd(temp1, dest); |
| masm.por(temp2, temp7); |
| masm.unpckhpd(dest, dest); |
| masm.addsd(dest, temp1); |
| masm.addsd(dest, temp6); |
| masm.addl(gpr4, 894); |
| masm.cmpl(gpr4, 1916); |
| masm.jcc(ConditionFlag.Above, bb1); |
| |
| masm.mulsd(dest, temp2); |
| masm.addsd(dest, temp2); |
| masm.jmp(bb14); |
| |
| masm.bind(bb1); |
| masm.movdqu(temp6, externalAddress(expBiasPtr)); // 0x00000000, |
| // 0x3ff00000, |
| // 0x00000000, |
| // 0x3ff00000 |
| masm.xorpd(temp3, temp3); |
| masm.movdqu(temp4, externalAddress(allOnesExpPtr)); // 0xffffffff, |
| // 0xffffffff, |
| // 0xffffffff, |
| // 0xffffffff |
| masm.movl(gpr4, -1022); |
| masm.subl(gpr4, gpr1); |
| masm.movdl(temp5, gpr4); |
| masm.psllq(temp4, temp5); |
| masm.movl(gpr3, gpr1); |
| masm.sarl(gpr1, 1); |
| masm.pinsrw(temp3, gpr1, 3); |
| masm.psllq(temp3, 4); |
| masm.psubd(temp2, temp3); |
| masm.mulsd(dest, temp2); |
| masm.cmpl(gpr4, 52); |
| masm.jcc(ConditionFlag.Greater, bb2); |
| |
| masm.pand(temp4, temp2); |
| masm.paddd(temp3, temp6); |
| masm.subsd(temp2, temp4); |
| masm.addsd(dest, temp2); |
| masm.cmpl(gpr3, 1023); |
| masm.jcc(ConditionFlag.GreaterEqual, bb3); |
| |
| masm.pextrw(gpr3, dest, 3); |
| masm.andl(gpr3, 32768); |
| masm.orl(gpr4, gpr3); |
| masm.cmpl(gpr4, 0); |
| masm.jcc(ConditionFlag.Equal, bb4); |
| |
| masm.movapd(temp6, dest); |
| masm.addsd(dest, temp4); |
| masm.mulsd(dest, temp3); |
| masm.pextrw(gpr3, dest, 3); |
| masm.andl(gpr3, 32752); |
| masm.cmpl(gpr3, 0); |
| masm.jcc(ConditionFlag.Equal, bb5); |
| |
| masm.jmp(bb14); |
| |
| masm.bind(bb5); |
| masm.mulsd(temp6, temp3); |
| masm.mulsd(temp4, temp3); |
| masm.movdqu(dest, temp6); |
| masm.pxor(temp6, temp4); |
| masm.psrad(temp6, 31); |
| masm.pshufd(temp6, temp6, 85); |
| masm.psllq(dest, 1); |
| masm.psrlq(dest, 1); |
| masm.pxor(dest, temp6); |
| masm.psrlq(temp6, 63); |
| masm.paddq(dest, temp6); |
| masm.paddq(dest, temp4); |
| masm.jmp(bb14); |
| |
| masm.bind(bb4); |
| masm.addsd(dest, temp4); |
| masm.mulsd(dest, temp3); |
| masm.jmp(bb14); |
| |
| masm.bind(bb3); |
| masm.addsd(dest, temp4); |
| masm.mulsd(dest, temp3); |
| masm.pextrw(gpr3, dest, 3); |
| masm.andl(gpr3, 32752); |
| masm.cmpl(gpr3, 32752); |
| masm.jcc(ConditionFlag.AboveEqual, bb7); |
| |
| masm.jmp(bb14); |
| |
| masm.bind(bb2); |
| masm.paddd(temp3, temp6); |
| masm.addpd(dest, temp2); |
| masm.mulsd(dest, temp3); |
| masm.jmp(bb14); |
| |
| masm.bind(bb8); |
| masm.movsd(dest, externalAddress(xMaxExpPtr)); // 0xffffffff, |
| // 0x7fefffff |
| masm.movsd(temp8, externalAddress(xMinExpPtr)); // 0x00000000, |
| // 0x00100000 |
| masm.cmpl(gpr1, 2146435072); |
| masm.jcc(ConditionFlag.AboveEqual, bb9); |
| |
| masm.movl(gpr1, new AMD64Address(gpr5, 4)); |
| masm.cmpl(gpr1, Integer.MIN_VALUE); |
| masm.jcc(ConditionFlag.AboveEqual, bb10); |
| |
| masm.mulsd(dest, dest); |
| |
| masm.bind(bb7); |
| masm.jmp(bb14); |
| |
| masm.bind(bb10); |
| masm.mulsd(dest, temp8); |
| masm.jmp(bb14); |
| |
| masm.bind(bb9); |
| masm.movl(gpr4, stackSlot); |
| masm.cmpl(gpr1, 2146435072); |
| masm.jcc(ConditionFlag.Above, bb11); |
| |
| masm.cmpl(gpr4, 0); |
| masm.jcc(ConditionFlag.NotEqual, bb11); |
| |
| masm.movl(gpr1, new AMD64Address(gpr5, 4)); |
| masm.cmpl(gpr1, 2146435072); |
| masm.jcc(ConditionFlag.NotEqual, bb12); |
| |
| masm.movsd(dest, externalAddress(infExpPtr)); // 0x00000000, |
| // 0x7ff00000 |
| masm.jmp(bb14); |
| |
| masm.bind(bb12); |
| masm.movsd(dest, externalAddress(zeroExpPtr)); // 0x00000000, |
| // 0x00000000 |
| masm.jmp(bb14); |
| |
| masm.bind(bb11); |
| masm.movsd(dest, stackSlot); |
| masm.addsd(dest, dest); |
| masm.jmp(bb14); |
| |
| masm.bind(bb0); |
| masm.movl(gpr1, new AMD64Address(gpr5, 4)); |
| masm.andl(gpr1, 2147483647); |
| masm.cmpl(gpr1, 1083179008); |
| masm.jcc(ConditionFlag.AboveEqual, bb8); |
| |
| masm.addsd(dest, externalAddress(onePtr)); // 0x00000000, |
| // 0x3ff00000 |
| masm.bind(bb14); |
| } |
| } |