| /* |
| * Copyright 2000-2013 JetBrains s.r.o. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * @author max |
| */ |
| package com.intellij.openapi.util.text; |
| |
| import com.intellij.util.text.CharArrayUtil; |
| import gnu.trove.TIntProcedure; |
| |
| public class TrigramBuilder { |
| private TrigramBuilder() { |
| } |
| |
| public static boolean processTrigrams(CharSequence text, TrigramProcessor consumer) { |
| final AddonlyIntSet set = new AddonlyIntSet(); |
| int index = 0; |
| final char[] fileTextArray = CharArrayUtil.fromSequenceWithoutCopying(text); |
| |
| ScanWordsLoop: |
| while (true) { |
| while (true) { |
| if (index == text.length()) break ScanWordsLoop; |
| final char c = fileTextArray != null ? fileTextArray[index]:text.charAt(index); |
| if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || |
| Character.isJavaIdentifierPart(c)) { |
| break; |
| } |
| index++; |
| } |
| int identifierStart = index; |
| while (true) { |
| index++; |
| if (index == text.length()) break; |
| final char c = fileTextArray != null ? fileTextArray[index]:text.charAt(index); |
| if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) continue; |
| if (!Character.isJavaIdentifierPart(c)) break; |
| } |
| |
| int tc1 = 0; |
| int tc2 = 0; |
| int tc3; |
| for (int i = identifierStart, iters = 0; i < index; ++i, ++iters) { |
| char c = StringUtil.toLowerCase(fileTextArray != null ? fileTextArray[i]:text.charAt(i)); |
| tc3 = (tc2 << 8) + c; |
| tc2 = (tc1 << 8) + c; |
| tc1 = c; |
| |
| if (iters >= 2) { |
| set.add(tc3); |
| } |
| } |
| } |
| |
| return consumer.consumeTrigramsCount(set.size()) && set.forEach(consumer); |
| } |
| |
| public static abstract class TrigramProcessor implements TIntProcedure { |
| public boolean consumeTrigramsCount(int count) { return true; } |
| } |
| } |
| |
| class AddonlyIntSet { |
| //private static final int MAGIC = 0x9E3779B9; |
| private int size; |
| private int[] data; |
| private int shift; |
| private int mask; |
| private boolean hasZeroKey; |
| |
| public AddonlyIntSet() { |
| this(21); |
| } |
| |
| public AddonlyIntSet(int expectedSize) { |
| int powerOfTwo = Integer.highestOneBit((3 * expectedSize) / 2) << 1; |
| shift = Integer.numberOfLeadingZeros(powerOfTwo) + 1; |
| mask = powerOfTwo - 1; |
| data = new int[powerOfTwo]; |
| } |
| |
| public int size() { |
| return size; |
| } |
| |
| private int hash(int h, int[] a) { |
| h ^= (h >>> 20) ^ (h >>> 12); |
| return (h ^ (h >>> 7) ^ (h >>> 4)) & mask; |
| //int idx = (id * MAGIC) >>> shift; |
| //if (idx >= a.length) { |
| // idx %= a.length; |
| //} |
| //return idx; |
| } |
| |
| public void add(int key) { |
| if (key == 0) { |
| if (!hasZeroKey) ++size; |
| hasZeroKey = true; |
| return; |
| } |
| if (size >= (2 * data.length) / 3) rehash(); |
| if (doPut(data, key)) size++; |
| } |
| |
| private boolean doPut(int[] a, int o) { |
| int index = hash(o, a); |
| int obj; |
| while ((obj = a[index]) != 0) { |
| if (obj == o) break; |
| if (index == 0) index = a.length; |
| index--; |
| } |
| a[index] = o; |
| return obj == 0; |
| } |
| |
| private void rehash() { |
| --shift; |
| int[] b = new int[data.length << 1]; |
| mask = b.length - 1; |
| for (int i = data.length; --i >= 0;) { |
| int ns = data[i]; |
| if (ns != 0) doPut(b, ns); |
| } |
| data = b; |
| } |
| |
| public boolean contains(int key) { |
| if (key == 0) return hasZeroKey; |
| int index = hash(key, data); |
| int v; |
| while ((v = data[index]) != 0) { |
| if (v == key) return true; |
| if (index == 0) index = data.length; |
| index--; |
| } |
| return false; |
| } |
| |
| public boolean forEach(TIntProcedure consumer) { |
| if (hasZeroKey && !consumer.execute(0)) return false; |
| for(int o:data) { |
| if (o == 0) continue; |
| if(!consumer.execute(o)) return false; |
| } |
| return true; |
| } |
| } |
| |