/**************************************************************************** | |
** | |
** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). | |
** All rights reserved. | |
** Contact: Nokia Corporation (qt-info@nokia.com) | |
** | |
** This file is part of the QtCore module of the Qt Toolkit. | |
** | |
** $QT_BEGIN_LICENSE:LGPL$ | |
** GNU Lesser General Public License Usage | |
** This file may be used under the terms of the GNU Lesser General Public | |
** License version 2.1 as published by the Free Software Foundation and | |
** appearing in the file LICENSE.LGPL included in the packaging of this | |
** file. Please review the following information to ensure the GNU Lesser | |
** General Public License version 2.1 requirements will be met: | |
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. | |
** | |
** In addition, as a special exception, Nokia gives you certain additional | |
** rights. These rights are described in the Nokia Qt LGPL Exception | |
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. | |
** | |
** GNU General Public License Usage | |
** Alternatively, this file may be used under the terms of the GNU General | |
** Public License version 3.0 as published by the Free Software Foundation | |
** and appearing in the file LICENSE.GPL included in the packaging of this | |
** file. Please review the following information to ensure the GNU General | |
** Public License version 3.0 requirements will be met: | |
** http://www.gnu.org/copyleft/gpl.html. | |
** | |
** Other Usage | |
** Alternatively, this file may be used in accordance with the terms and | |
** conditions contained in a signed written agreement between you and Nokia. | |
** | |
** | |
** | |
** | |
** | |
** $QT_END_LICENSE$ | |
** | |
****************************************************************************/ | |
#include "qbytearraymatcher.h" | |
#include <limits.h> | |
QT_BEGIN_NAMESPACE | |
static inline void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable) | |
{ | |
int l = qMin(len, 255); | |
memset(skiptable, l, 256*sizeof(uchar)); | |
cc += len - l; | |
while (l--) | |
skiptable[*cc++] = l; | |
} | |
static inline int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl, | |
const uchar *skiptable) | |
{ | |
if (pl == 0) | |
return index > l ? -1 : index; | |
const uint pl_minus_one = pl - 1; | |
register const uchar *current = cc + index + pl_minus_one; | |
const uchar *end = cc + l; | |
while (current < end) { | |
uint skip = skiptable[*current]; | |
if (!skip) { | |
// possible match | |
while (skip < pl) { | |
if (*(current - skip) != puc[pl_minus_one - skip]) | |
break; | |
skip++; | |
} | |
if (skip > pl_minus_one) // we have a match | |
return (current - cc) - skip + 1; | |
// in case we don't have a match we are a bit inefficient as we only skip by one | |
// when we have the non matching char in the string. | |
if (skiptable[*(current - skip)] == pl) | |
skip = pl - skip; | |
else | |
skip = 1; | |
} | |
if (current > end - skip) | |
break; | |
current += skip; | |
} | |
return -1; // not found | |
} | |
/*! \class QByteArrayMatcher | |
\brief The QByteArrayMatcher class holds a sequence of bytes that | |
can be quickly matched in a byte array. | |
\ingroup tools | |
\ingroup string-processing | |
This class is useful when you have a sequence of bytes that you | |
want to repeatedly match against some byte arrays (perhaps in a | |
loop), or when you want to search for the same sequence of bytes | |
multiple times in the same byte array. Using a matcher object and | |
indexIn() is faster than matching a plain QByteArray with | |
QByteArray::indexOf() if repeated matching takes place. This | |
class offers no benefit if you are doing one-off byte array | |
matches. | |
Create the QByteArrayMatcher with the QByteArray you want to | |
search for. Then call indexIn() on the QByteArray that you want to | |
search. | |
\sa QByteArray, QStringMatcher | |
*/ | |
/*! | |
Constructs an empty byte array matcher that won't match anything. | |
Call setPattern() to give it a pattern to match. | |
*/ | |
QByteArrayMatcher::QByteArrayMatcher() | |
: d(0) | |
{ | |
p.p = 0; | |
p.l = 0; | |
qMemSet(p.q_skiptable, 0, sizeof(p.q_skiptable)); | |
} | |
/*! | |
Constructs a byte array matcher from \a pattern. \a pattern | |
has the given \a length. \a pattern must remain in scope, but | |
the destructor does not delete \a pattern. | |
*/ | |
QByteArrayMatcher::QByteArrayMatcher(const char *pattern, int length) | |
: d(0) | |
{ | |
p.p = reinterpret_cast<const uchar *>(pattern); | |
p.l = length; | |
bm_init_skiptable(p.p, p.l, p.q_skiptable); | |
} | |
/*! | |
Constructs a byte array matcher that will search for \a pattern. | |
Call indexIn() to perform a search. | |
*/ | |
QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern) | |
: d(0), q_pattern(pattern) | |
{ | |
p.p = reinterpret_cast<const uchar *>(pattern.constData()); | |
p.l = pattern.size(); | |
bm_init_skiptable(p.p, p.l, p.q_skiptable); | |
} | |
/*! | |
Copies the \a other byte array matcher to this byte array matcher. | |
*/ | |
QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other) | |
: d(0) | |
{ | |
operator=(other); | |
} | |
/*! | |
Destroys the byte array matcher. | |
*/ | |
QByteArrayMatcher::~QByteArrayMatcher() | |
{ | |
} | |
/*! | |
Assigns the \a other byte array matcher to this byte array matcher. | |
*/ | |
QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other) | |
{ | |
q_pattern = other.q_pattern; | |
memcpy(&p, &other.p, sizeof(p)); | |
return *this; | |
} | |
/*! | |
Sets the byte array that this byte array matcher will search for | |
to \a pattern. | |
\sa pattern(), indexIn() | |
*/ | |
void QByteArrayMatcher::setPattern(const QByteArray &pattern) | |
{ | |
q_pattern = pattern; | |
p.p = reinterpret_cast<const uchar *>(pattern.constData()); | |
p.l = pattern.size(); | |
bm_init_skiptable(p.p, p.l, p.q_skiptable); | |
} | |
/*! | |
Searches the byte array \a ba, from byte position \a from (default | |
0, i.e. from the first byte), for the byte array pattern() that | |
was set in the constructor or in the most recent call to | |
setPattern(). Returns the position where the pattern() matched in | |
\a ba, or -1 if no match was found. | |
*/ | |
int QByteArrayMatcher::indexIn(const QByteArray &ba, int from) const | |
{ | |
if (from < 0) | |
from = 0; | |
return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from, | |
p.p, p.l, p.q_skiptable); | |
} | |
/*! | |
Searches the char string \a str, which has length \a len, from | |
byte position \a from (default 0, i.e. from the first byte), for | |
the byte array pattern() that was set in the constructor or in the | |
most recent call to setPattern(). Returns the position where the | |
pattern() matched in \a str, or -1 if no match was found. | |
*/ | |
int QByteArrayMatcher::indexIn(const char *str, int len, int from) const | |
{ | |
if (from < 0) | |
from = 0; | |
return bm_find(reinterpret_cast<const uchar *>(str), len, from, | |
p.p, p.l, p.q_skiptable); | |
} | |
/*! | |
\fn QByteArray QByteArrayMatcher::pattern() const | |
Returns the byte array pattern that this byte array matcher will | |
search for. | |
\sa setPattern() | |
*/ | |
static int findChar(const char *str, int len, char ch, int from) | |
{ | |
const uchar *s = (const uchar *)str; | |
uchar c = (uchar)ch; | |
if (from < 0) | |
from = qMax(from + len, 0); | |
if (from < len) { | |
const uchar *n = s + from - 1; | |
const uchar *e = s + len; | |
while (++n != e) | |
if (*n == c) | |
return n - s; | |
} | |
return -1; | |
} | |
/*! \internal | |
*/ | |
static int qFindByteArrayBoyerMoore( | |
const char *haystack, int haystackLen, int haystackOffset, | |
const char *needle, int needleLen) | |
{ | |
uchar skiptable[256]; | |
bm_init_skiptable((const uchar *)needle, needleLen, skiptable); | |
if (haystackOffset < 0) | |
haystackOffset = 0; | |
return bm_find((const uchar *)haystack, haystackLen, haystackOffset, | |
(const uchar *)needle, needleLen, skiptable); | |
} | |
#define REHASH(a) \ | |
if (sl_minus_1 < sizeof(uint) * CHAR_BIT) \ | |
hashHaystack -= (a) << sl_minus_1; \ | |
hashHaystack <<= 1 | |
/*! \internal | |
*/ | |
int qFindByteArray( | |
const char *haystack0, int haystackLen, int from, | |
const char *needle, int needleLen) | |
{ | |
const int l = haystackLen; | |
const int sl = needleLen; | |
if (from < 0) | |
from += l; | |
if (uint(sl + from) > (uint)l) | |
return -1; | |
if (!sl) | |
return from; | |
if (!l) | |
return -1; | |
if (sl == 1) | |
return findChar(haystack0, haystackLen, needle[0], from); | |
/* | |
We use the Boyer-Moore algorithm in cases where the overhead | |
for the skip table should pay off, otherwise we use a simple | |
hash function. | |
*/ | |
if (l > 500 && sl > 5) | |
return qFindByteArrayBoyerMoore(haystack0, haystackLen, from, | |
needle, needleLen); | |
/* | |
We use some hashing for efficiency's sake. Instead of | |
comparing strings, we compare the hash value of str with that | |
of a part of this QString. Only if that matches, we call memcmp(). | |
*/ | |
const char *haystack = haystack0 + from; | |
const char *end = haystack0 + (l - sl); | |
const uint sl_minus_1 = sl - 1; | |
uint hashNeedle = 0, hashHaystack = 0; | |
int idx; | |
for (idx = 0; idx < sl; ++idx) { | |
hashNeedle = ((hashNeedle<<1) + needle[idx]); | |
hashHaystack = ((hashHaystack<<1) + haystack[idx]); | |
} | |
hashHaystack -= *(haystack + sl_minus_1); | |
while (haystack <= end) { | |
hashHaystack += *(haystack + sl_minus_1); | |
if (hashHaystack == hashNeedle && *needle == *haystack | |
&& memcmp(needle, haystack, sl) == 0) | |
return haystack - haystack0; | |
REHASH(*haystack); | |
++haystack; | |
} | |
return -1; | |
} | |
QT_END_NAMESPACE |