| /* |
| * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| |
| package org.graalvm.compiler.asm.amd64; |
| |
| import static jdk.vm.ci.amd64.AMD64.rax; |
| import static jdk.vm.ci.amd64.AMD64.rcx; |
| import static jdk.vm.ci.amd64.AMD64.rdx; |
| import static jdk.vm.ci.amd64.AMD64.rsp; |
| import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIncDec; |
| import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmLoadAndClearUpper; |
| import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmRegToRegMoveAll; |
| |
| import org.graalvm.compiler.asm.Label; |
| import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; |
| import org.graalvm.compiler.core.common.NumUtil; |
| |
| import jdk.vm.ci.amd64.AMD64; |
| import jdk.vm.ci.amd64.AMD64Kind; |
| import jdk.vm.ci.code.Register; |
| import jdk.vm.ci.code.TargetDescription; |
| |
| /** |
| * This class implements commonly used X86 code patterns. |
| */ |
| public class AMD64MacroAssembler extends AMD64Assembler { |
| |
| public AMD64MacroAssembler(TargetDescription target) { |
| super(target); |
| } |
| |
| public final void decrementq(Register reg, int value) { |
| if (value == Integer.MIN_VALUE) { |
| subq(reg, value); |
| return; |
| } |
| if (value < 0) { |
| incrementq(reg, -value); |
| return; |
| } |
| if (value == 0) { |
| return; |
| } |
| if (value == 1 && UseIncDec) { |
| decq(reg); |
| } else { |
| subq(reg, value); |
| } |
| } |
| |
| public final void decrementq(AMD64Address dst, int value) { |
| if (value == Integer.MIN_VALUE) { |
| subq(dst, value); |
| return; |
| } |
| if (value < 0) { |
| incrementq(dst, -value); |
| return; |
| } |
| if (value == 0) { |
| return; |
| } |
| if (value == 1 && UseIncDec) { |
| decq(dst); |
| } else { |
| subq(dst, value); |
| } |
| } |
| |
| public void incrementq(Register reg, int value) { |
| if (value == Integer.MIN_VALUE) { |
| addq(reg, value); |
| return; |
| } |
| if (value < 0) { |
| decrementq(reg, -value); |
| return; |
| } |
| if (value == 0) { |
| return; |
| } |
| if (value == 1 && UseIncDec) { |
| incq(reg); |
| } else { |
| addq(reg, value); |
| } |
| } |
| |
| public final void incrementq(AMD64Address dst, int value) { |
| if (value == Integer.MIN_VALUE) { |
| addq(dst, value); |
| return; |
| } |
| if (value < 0) { |
| decrementq(dst, -value); |
| return; |
| } |
| if (value == 0) { |
| return; |
| } |
| if (value == 1 && UseIncDec) { |
| incq(dst); |
| } else { |
| addq(dst, value); |
| } |
| } |
| |
| public final void movptr(Register dst, AMD64Address src) { |
| movq(dst, src); |
| } |
| |
| public final void movptr(AMD64Address dst, Register src) { |
| movq(dst, src); |
| } |
| |
| public final void movptr(AMD64Address dst, int src) { |
| movslq(dst, src); |
| } |
| |
| public final void cmpptr(Register src1, Register src2) { |
| cmpq(src1, src2); |
| } |
| |
| public final void cmpptr(Register src1, AMD64Address src2) { |
| cmpq(src1, src2); |
| } |
| |
| public final void decrementl(Register reg) { |
| decrementl(reg, 1); |
| } |
| |
| public final void decrementl(Register reg, int value) { |
| if (value == Integer.MIN_VALUE) { |
| subl(reg, value); |
| return; |
| } |
| if (value < 0) { |
| incrementl(reg, -value); |
| return; |
| } |
| if (value == 0) { |
| return; |
| } |
| if (value == 1 && UseIncDec) { |
| decl(reg); |
| } else { |
| subl(reg, value); |
| } |
| } |
| |
| public final void decrementl(AMD64Address dst, int value) { |
| if (value == Integer.MIN_VALUE) { |
| subl(dst, value); |
| return; |
| } |
| if (value < 0) { |
| incrementl(dst, -value); |
| return; |
| } |
| if (value == 0) { |
| return; |
| } |
| if (value == 1 && UseIncDec) { |
| decl(dst); |
| } else { |
| subl(dst, value); |
| } |
| } |
| |
| public final void incrementl(Register reg, int value) { |
| if (value == Integer.MIN_VALUE) { |
| addl(reg, value); |
| return; |
| } |
| if (value < 0) { |
| decrementl(reg, -value); |
| return; |
| } |
| if (value == 0) { |
| return; |
| } |
| if (value == 1 && UseIncDec) { |
| incl(reg); |
| } else { |
| addl(reg, value); |
| } |
| } |
| |
| public final void incrementl(AMD64Address dst, int value) { |
| if (value == Integer.MIN_VALUE) { |
| addl(dst, value); |
| return; |
| } |
| if (value < 0) { |
| decrementl(dst, -value); |
| return; |
| } |
| if (value == 0) { |
| return; |
| } |
| if (value == 1 && UseIncDec) { |
| incl(dst); |
| } else { |
| addl(dst, value); |
| } |
| } |
| |
| public void movflt(Register dst, Register src) { |
| assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); |
| if (UseXmmRegToRegMoveAll) { |
| movaps(dst, src); |
| } else { |
| movss(dst, src); |
| } |
| } |
| |
| public void movflt(Register dst, AMD64Address src) { |
| assert dst.getRegisterCategory().equals(AMD64.XMM); |
| movss(dst, src); |
| } |
| |
| public void movflt(AMD64Address dst, Register src) { |
| assert src.getRegisterCategory().equals(AMD64.XMM); |
| movss(dst, src); |
| } |
| |
| public void movdbl(Register dst, Register src) { |
| assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); |
| if (UseXmmRegToRegMoveAll) { |
| movapd(dst, src); |
| } else { |
| movsd(dst, src); |
| } |
| } |
| |
| public void movdbl(Register dst, AMD64Address src) { |
| assert dst.getRegisterCategory().equals(AMD64.XMM); |
| if (UseXmmLoadAndClearUpper) { |
| movsd(dst, src); |
| } else { |
| movlpd(dst, src); |
| } |
| } |
| |
| public void movdbl(AMD64Address dst, Register src) { |
| assert src.getRegisterCategory().equals(AMD64.XMM); |
| movsd(dst, src); |
| } |
| |
| /** |
| * Non-atomic write of a 64-bit constant to memory. Do not use if the address might be a |
| * volatile field! |
| */ |
| public final void movlong(AMD64Address dst, long src) { |
| if (NumUtil.isInt(src)) { |
| AMD64MIOp.MOV.emit(this, OperandSize.QWORD, dst, (int) src); |
| } else { |
| AMD64Address high = new AMD64Address(dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4); |
| movl(dst, (int) (src & 0xFFFFFFFF)); |
| movl(high, (int) (src >> 32)); |
| } |
| |
| } |
| |
| public final void setl(ConditionFlag cc, Register dst) { |
| setb(cc, dst); |
| movzbl(dst, dst); |
| } |
| |
| public final void setq(ConditionFlag cc, Register dst) { |
| setb(cc, dst); |
| movzbq(dst, dst); |
| } |
| |
| public final void flog(Register dest, Register value, boolean base10) { |
| if (base10) { |
| fldlg2(); |
| } else { |
| fldln2(); |
| } |
| AMD64Address tmp = trigPrologue(value); |
| fyl2x(); |
| trigEpilogue(dest, tmp); |
| } |
| |
| public final void fsin(Register dest, Register value) { |
| AMD64Address tmp = trigPrologue(value); |
| fsin(); |
| trigEpilogue(dest, tmp); |
| } |
| |
| public final void fcos(Register dest, Register value) { |
| AMD64Address tmp = trigPrologue(value); |
| fcos(); |
| trigEpilogue(dest, tmp); |
| } |
| |
| public final void ftan(Register dest, Register value) { |
| AMD64Address tmp = trigPrologue(value); |
| fptan(); |
| fstp(0); // ftan pushes 1.0 in addition to the actual result, pop |
| trigEpilogue(dest, tmp); |
| } |
| |
| public final void fpop() { |
| ffree(0); |
| fincstp(); |
| } |
| |
| private AMD64Address trigPrologue(Register value) { |
| assert value.getRegisterCategory().equals(AMD64.XMM); |
| AMD64Address tmp = new AMD64Address(AMD64.rsp); |
| subq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); |
| movdbl(tmp, value); |
| fldd(tmp); |
| return tmp; |
| } |
| |
| private void trigEpilogue(Register dest, AMD64Address tmp) { |
| assert dest.getRegisterCategory().equals(AMD64.XMM); |
| fstpd(tmp); |
| movdbl(dest, tmp); |
| addq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); |
| } |
| |
| // IndexOf for constant substrings with size >= 8 chars |
| // which don't need to be loaded through stack. |
| public void stringIndexofC8(Register str1, Register str2, |
| Register cnt1, Register cnt2, |
| int intCnt2, Register result, |
| Register vec, Register tmp) { |
| // assert(UseSSE42Intrinsics, "SSE4.2 is required"); |
| |
| // This method uses pcmpestri inxtruction with bound registers |
| // inputs: |
| // xmm - substring |
| // rax - substring length (elements count) |
| // mem - scanned string |
| // rdx - string length (elements count) |
| // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) |
| // outputs: |
| // rcx - matched index in string |
| assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; |
| |
| Label reloadSubstr = new Label(); |
| Label scanToSubstr = new Label(); |
| Label scanSubstr = new Label(); |
| Label retFound = new Label(); |
| Label retNotFound = new Label(); |
| Label exit = new Label(); |
| Label foundSubstr = new Label(); |
| Label matchSubstrHead = new Label(); |
| Label reloadStr = new Label(); |
| Label foundCandidate = new Label(); |
| |
| // Note, inline_string_indexOf() generates checks: |
| // if (substr.count > string.count) return -1; |
| // if (substr.count == 0) return 0; |
| assert intCnt2 >= 8 : "this code isused only for cnt2 >= 8 chars"; |
| |
| // Load substring. |
| movdqu(vec, new AMD64Address(str2, 0)); |
| movl(cnt2, intCnt2); |
| movq(result, str1); // string addr |
| |
| if (intCnt2 > 8) { |
| jmpb(scanToSubstr); |
| |
| // Reload substr for rescan, this code |
| // is executed only for large substrings (> 8 chars) |
| bind(reloadSubstr); |
| movdqu(vec, new AMD64Address(str2, 0)); |
| negq(cnt2); // Jumped here with negative cnt2, convert to positive |
| |
| bind(reloadStr); |
| // We came here after the beginning of the substring was |
| // matched but the rest of it was not so we need to search |
| // again. Start from the next element after the previous match. |
| |
| // cnt2 is number of substring reminding elements and |
| // cnt1 is number of string reminding elements when cmp failed. |
| // Restored cnt1 = cnt1 - cnt2 + int_cnt2 |
| subl(cnt1, cnt2); |
| addl(cnt1, intCnt2); |
| movl(cnt2, intCnt2); // Now restore cnt2 |
| |
| decrementl(cnt1, 1); // Shift to next element |
| cmpl(cnt1, cnt2); |
| jccb(ConditionFlag.Negative, retNotFound); // Left less then substring |
| |
| addq(result, 2); |
| |
| } // (int_cnt2 > 8) |
| |
| // Scan string for start of substr in 16-byte vectors |
| bind(scanToSubstr); |
| pcmpestri(vec, new AMD64Address(result, 0), 0x0d); |
| jccb(ConditionFlag.Below, foundCandidate); // CF == 1 |
| subl(cnt1, 8); |
| jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string |
| cmpl(cnt1, cnt2); |
| jccb(ConditionFlag.Negative, retNotFound); // Left less then substring |
| addq(result, 16); |
| jmpb(scanToSubstr); |
| |
| // Found a potential substr |
| bind(foundCandidate); |
| // Matched whole vector if first element matched (tmp(rcx) == 0). |
| if (intCnt2 == 8) { |
| jccb(ConditionFlag.Overflow, retFound); // OF == 1 |
| } else { // int_cnt2 > 8 |
| jccb(ConditionFlag.Overflow, foundSubstr); |
| } |
| // After pcmpestri tmp(rcx) contains matched element index |
| // Compute start addr of substr |
| leaq(result, new AMD64Address(result, tmp, Scale.Times2, 0)); |
| |
| // Make sure string is still long enough |
| subl(cnt1, tmp); |
| cmpl(cnt1, cnt2); |
| if (intCnt2 == 8) { |
| jccb(ConditionFlag.GreaterEqual, scanToSubstr); |
| } else { // int_cnt2 > 8 |
| jccb(ConditionFlag.GreaterEqual, matchSubstrHead); |
| } |
| // Left less then substring. |
| |
| bind(retNotFound); |
| movl(result, -1); |
| jmpb(exit); |
| |
| if (intCnt2 > 8) { |
| // This code is optimized for the case when whole substring |
| // is matched if its head is matched. |
| bind(matchSubstrHead); |
| pcmpestri(vec, new AMD64Address(result, 0), 0x0d); |
| // Reload only string if does not match |
| jccb(ConditionFlag.NoOverflow, reloadStr); // OF == 0 |
| |
| Label contScanSubstr = new Label(); |
| // Compare the rest of substring (> 8 chars). |
| bind(foundSubstr); |
| // First 8 chars are already matched. |
| negq(cnt2); |
| addq(cnt2, 8); |
| |
| bind(scanSubstr); |
| subl(cnt1, 8); |
| cmpl(cnt2, -8); // Do not read beyond substring |
| jccb(ConditionFlag.LessEqual, contScanSubstr); |
| // Back-up strings to avoid reading beyond substring: |
| // cnt1 = cnt1 - cnt2 + 8 |
| addl(cnt1, cnt2); // cnt2 is negative |
| addl(cnt1, 8); |
| movl(cnt2, 8); |
| negq(cnt2); |
| bind(contScanSubstr); |
| if (intCnt2 < 1024 * 1024 * 1024) { |
| movdqu(vec, new AMD64Address(str2, cnt2, Scale.Times2, intCnt2 * 2)); |
| pcmpestri(vec, new AMD64Address(result, cnt2, Scale.Times2, intCnt2 * 2), 0x0d); |
| } else { |
| // calculate index in register to avoid integer overflow (int_cnt2*2) |
| movl(tmp, intCnt2); |
| addq(tmp, cnt2); |
| movdqu(vec, new AMD64Address(str2, tmp, Scale.Times2, 0)); |
| pcmpestri(vec, new AMD64Address(result, tmp, Scale.Times2, 0), 0x0d); |
| } |
| // Need to reload strings pointers if not matched whole vector |
| jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 |
| addq(cnt2, 8); |
| jcc(ConditionFlag.Negative, scanSubstr); |
| // Fall through if found full substring |
| |
| } // (int_cnt2 > 8) |
| |
| bind(retFound); |
| // Found result if we matched full small substring. |
| // Compute substr offset |
| subq(result, str1); |
| shrl(result, 1); // index |
| bind(exit); |
| |
| } // string_indexofC8 |
| |
| // Small strings are loaded through stack if they cross page boundary. |
| public void stringIndexOf(Register str1, Register str2, |
| Register cnt1, Register cnt2, |
| int intCnt2, Register result, |
| Register vec, Register tmp, int vmPageSize) { |
| // |
| // int_cnt2 is length of small (< 8 chars) constant substring |
| // or (-1) for non constant substring in which case its length |
| // is in cnt2 register. |
| // |
| // Note, inline_string_indexOf() generates checks: |
| // if (substr.count > string.count) return -1; |
| // if (substr.count == 0) return 0; |
| // |
| assert intCnt2 == -1 || (0 < intCnt2 && intCnt2 < 8) : "should be != 0"; |
| |
| // This method uses pcmpestri instruction with bound registers |
| // inputs: |
| // xmm - substring |
| // rax - substring length (elements count) |
| // mem - scanned string |
| // rdx - string length (elements count) |
| // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) |
| // outputs: |
| // rcx - matched index in string |
| assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; |
| |
| Label reloadSubstr = new Label(); |
| Label scanToSubstr = new Label(); |
| Label scanSubstr = new Label(); |
| Label adjustStr = new Label(); |
| Label retFound = new Label(); |
| Label retNotFound = new Label(); |
| Label cleanup = new Label(); |
| Label foundSubstr = new Label(); |
| Label foundCandidate = new Label(); |
| |
| int wordSize = 8; |
| // We don't know where these strings are located |
| // and we can't read beyond them. Load them through stack. |
| Label bigStrings = new Label(); |
| Label checkStr = new Label(); |
| Label copySubstr = new Label(); |
| Label copyStr = new Label(); |
| |
| movq(tmp, rsp); // save old SP |
| |
| if (intCnt2 > 0) { // small (< 8 chars) constant substring |
| if (intCnt2 == 1) { // One char |
| movzwl(result, new AMD64Address(str2, 0)); |
| movdl(vec, result); // move 32 bits |
| } else if (intCnt2 == 2) { // Two chars |
| movdl(vec, new AMD64Address(str2, 0)); // move 32 bits |
| } else if (intCnt2 == 4) { // Four chars |
| movq(vec, new AMD64Address(str2, 0)); // move 64 bits |
| } else { // cnt2 = { 3, 5, 6, 7 } |
| // Array header size is 12 bytes in 32-bit VM |
| // + 6 bytes for 3 chars == 18 bytes, |
| // enough space to load vec and shift. |
| movdqu(vec, new AMD64Address(str2, (intCnt2 * 2) - 16)); |
| psrldq(vec, 16 - (intCnt2 * 2)); |
| } |
| } else { // not constant substring |
| cmpl(cnt2, 8); |
| jccb(ConditionFlag.AboveEqual, bigStrings); // Both strings are big enough |
| |
| // We can read beyond string if str+16 does not cross page boundary |
| // since heaps are aligned and mapped by pages. |
| assert vmPageSize < 1024 * 1024 * 1024 : "default page should be small"; |
| movl(result, str2); // We need only low 32 bits |
| andl(result, (vmPageSize - 1)); |
| cmpl(result, (vmPageSize - 16)); |
| jccb(ConditionFlag.BelowEqual, checkStr); |
| |
| // Move small strings to stack to allow load 16 bytes into vec. |
| subq(rsp, 16); |
| int stackOffset = wordSize - 2; |
| push(cnt2); |
| |
| bind(copySubstr); |
| movzwl(result, new AMD64Address(str2, cnt2, Scale.Times2, -2)); |
| movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); |
| decrementl(cnt2, 1); |
| jccb(ConditionFlag.NotZero, copySubstr); |
| |
| pop(cnt2); |
| movq(str2, rsp); // New substring address |
| } // non constant |
| |
| bind(checkStr); |
| cmpl(cnt1, 8); |
| jccb(ConditionFlag.AboveEqual, bigStrings); |
| |
| // Check cross page boundary. |
| movl(result, str1); // We need only low 32 bits |
| andl(result, (vmPageSize - 1)); |
| cmpl(result, (vmPageSize - 16)); |
| jccb(ConditionFlag.BelowEqual, bigStrings); |
| |
| subq(rsp, 16); |
| int stackOffset = -2; |
| if (intCnt2 < 0) { // not constant |
| push(cnt2); |
| stackOffset += wordSize; |
| } |
| movl(cnt2, cnt1); |
| |
| bind(copyStr); |
| movzwl(result, new AMD64Address(str1, cnt2, Scale.Times2, -2)); |
| movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); |
| decrementl(cnt2, 1); |
| jccb(ConditionFlag.NotZero, copyStr); |
| |
| if (intCnt2 < 0) { // not constant |
| pop(cnt2); |
| } |
| movq(str1, rsp); // New string address |
| |
| bind(bigStrings); |
| // Load substring. |
| if (intCnt2 < 0) { // -1 |
| movdqu(vec, new AMD64Address(str2, 0)); |
| push(cnt2); // substr count |
| push(str2); // substr addr |
| push(str1); // string addr |
| } else { |
| // Small (< 8 chars) constant substrings are loaded already. |
| movl(cnt2, intCnt2); |
| } |
| push(tmp); // original SP |
| // Finished loading |
| |
| // ======================================================== |
| // Start search |
| // |
| |
| movq(result, str1); // string addr |
| |
| if (intCnt2 < 0) { // Only for non constant substring |
| jmpb(scanToSubstr); |
| |
| // SP saved at sp+0 |
| // String saved at sp+1*wordSize |
| // Substr saved at sp+2*wordSize |
| // Substr count saved at sp+3*wordSize |
| |
| // Reload substr for rescan, this code |
| // is executed only for large substrings (> 8 chars) |
| bind(reloadSubstr); |
| movq(str2, new AMD64Address(rsp, 2 * wordSize)); |
| movl(cnt2, new AMD64Address(rsp, 3 * wordSize)); |
| movdqu(vec, new AMD64Address(str2, 0)); |
| // We came here after the beginning of the substring was |
| // matched but the rest of it was not so we need to search |
| // again. Start from the next element after the previous match. |
| subq(str1, result); // Restore counter |
| shrl(str1, 1); |
| addl(cnt1, str1); |
| decrementl(cnt1); // Shift to next element |
| cmpl(cnt1, cnt2); |
| jccb(ConditionFlag.Negative, retNotFound); // Left less then substring |
| |
| addq(result, 2); |
| } // non constant |
| |
| // Scan string for start of substr in 16-byte vectors |
| bind(scanToSubstr); |
| assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; |
| pcmpestri(vec, new AMD64Address(result, 0), 0x0d); |
| jccb(ConditionFlag.Below, foundCandidate); // CF == 1 |
| subl(cnt1, 8); |
| jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string |
| cmpl(cnt1, cnt2); |
| jccb(ConditionFlag.Negative, retNotFound); // Left less then substring |
| addq(result, 16); |
| |
| bind(adjustStr); |
| cmpl(cnt1, 8); // Do not read beyond string |
| jccb(ConditionFlag.GreaterEqual, scanToSubstr); |
| // Back-up string to avoid reading beyond string. |
| leaq(result, new AMD64Address(result, cnt1, Scale.Times2, -16)); |
| movl(cnt1, 8); |
| jmpb(scanToSubstr); |
| |
| // Found a potential substr |
| bind(foundCandidate); |
| // After pcmpestri tmp(rcx) contains matched element index |
| |
| // Make sure string is still long enough |
| subl(cnt1, tmp); |
| cmpl(cnt1, cnt2); |
| jccb(ConditionFlag.GreaterEqual, foundSubstr); |
| // Left less then substring. |
| |
| bind(retNotFound); |
| movl(result, -1); |
| jmpb(cleanup); |
| |
| bind(foundSubstr); |
| // Compute start addr of substr |
| leaq(result, new AMD64Address(result, tmp, Scale.Times2)); |
| |
| if (intCnt2 > 0) { // Constant substring |
| // Repeat search for small substring (< 8 chars) |
| // from new point without reloading substring. |
| // Have to check that we don't read beyond string. |
| cmpl(tmp, 8 - intCnt2); |
| jccb(ConditionFlag.Greater, adjustStr); |
| // Fall through if matched whole substring. |
| } else { // non constant |
| assert intCnt2 == -1 : "should be != 0"; |
| |
| addl(tmp, cnt2); |
| // Found result if we matched whole substring. |
| cmpl(tmp, 8); |
| jccb(ConditionFlag.LessEqual, retFound); |
| |
| // Repeat search for small substring (<= 8 chars) |
| // from new point 'str1' without reloading substring. |
| cmpl(cnt2, 8); |
| // Have to check that we don't read beyond string. |
| jccb(ConditionFlag.LessEqual, adjustStr); |
| |
| Label checkNext = new Label(); |
| Label contScanSubstr = new Label(); |
| Label retFoundLong = new Label(); |
| // Compare the rest of substring (> 8 chars). |
| movq(str1, result); |
| |
| cmpl(tmp, cnt2); |
| // First 8 chars are already matched. |
| jccb(ConditionFlag.Equal, checkNext); |
| |
| bind(scanSubstr); |
| pcmpestri(vec, new AMD64Address(str1, 0), 0x0d); |
| // Need to reload strings pointers if not matched whole vector |
| jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 |
| |
| bind(checkNext); |
| subl(cnt2, 8); |
| jccb(ConditionFlag.LessEqual, retFoundLong); // Found full substring |
| addq(str1, 16); |
| addq(str2, 16); |
| subl(cnt1, 8); |
| cmpl(cnt2, 8); // Do not read beyond substring |
| jccb(ConditionFlag.GreaterEqual, contScanSubstr); |
| // Back-up strings to avoid reading beyond substring. |
| leaq(str2, new AMD64Address(str2, cnt2, Scale.Times2, -16)); |
| leaq(str1, new AMD64Address(str1, cnt2, Scale.Times2, -16)); |
| subl(cnt1, cnt2); |
| movl(cnt2, 8); |
| addl(cnt1, 8); |
| bind(contScanSubstr); |
| movdqu(vec, new AMD64Address(str2, 0)); |
| jmpb(scanSubstr); |
| |
| bind(retFoundLong); |
| movq(str1, new AMD64Address(rsp, wordSize)); |
| } // non constant |
| |
| bind(retFound); |
| // Compute substr offset |
| subq(result, str1); |
| shrl(result, 1); // index |
| |
| bind(cleanup); |
| pop(rsp); // restore SP |
| |
| } |
| |
| } |