/*------------------------------------------------------------------------------ | |
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team | |
* | |
* Distributable under the terms of either the Apache License (Version 2.0) or | |
* the GNU Lesser General Public License, as specified in the COPYING file. | |
------------------------------------------------------------------------------*/ | |
#include "CLucene/StdHeader.h" | |
#include "WildcardTermEnum.h" | |
CL_NS_USE(index) | |
CL_NS_DEF(search) | |
bool WildcardTermEnum::termCompare(Term* term) { | |
if ( term!=NULL && __term->field() == term->field() ) { | |
const TCHAR* searchText = term->text(); | |
const TCHAR* patternText = __term->text(); | |
if ( _tcsncmp( searchText, pre, preLen ) == 0 ){ | |
return wildcardEquals(patternText+preLen, __term->textLength()-preLen, 0, searchText, term->textLength(), preLen); | |
} | |
} | |
_endEnum = true; | |
return false; | |
} | |
/** Creates new WildcardTermEnum */ | |
WildcardTermEnum::WildcardTermEnum(IndexReader* reader, Term* term): | |
FilteredTermEnum(), | |
__term(_CL_POINTER(term)), | |
fieldMatch(false), | |
_endEnum(false) | |
{ | |
pre = stringDuplicate(term->text()); | |
const TCHAR* sidx = _tcschr( pre, LUCENE_WILDCARDTERMENUM_WILDCARD_STRING ); | |
const TCHAR* cidx = _tcschr( pre, LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR ); | |
const TCHAR* tidx = sidx; | |
if (tidx == NULL) | |
tidx = cidx; | |
else if ( cidx && cidx > pre) | |
tidx = min(sidx, cidx); | |
CND_PRECONDITION(tidx != NULL, "tidx==NULL"); | |
int32_t idx = (int32_t)(tidx - pre); | |
preLen = idx; | |
CND_PRECONDITION(preLen<term->textLength(), "preLen >= term->textLength()"); | |
pre[preLen]=0; //trim end | |
Term* t = _CLNEW Term(__term, pre); | |
setEnum( reader->terms(t) ); | |
_CLDECDELETE(t); | |
} | |
void WildcardTermEnum::close() | |
{ | |
if ( __term != NULL ){ | |
FilteredTermEnum::close(); | |
_CLDECDELETE(__term); | |
__term = NULL; | |
_CLDELETE_CARRAY( pre ); | |
} | |
} | |
WildcardTermEnum::~WildcardTermEnum() { | |
close(); | |
} | |
qreal WildcardTermEnum::difference() { | |
return 1.0f; | |
} | |
bool WildcardTermEnum::endEnum() { | |
return _endEnum; | |
} | |
bool WildcardTermEnum::wildcardEquals(const TCHAR* pattern, int32_t patternLen, int32_t patternIdx, const TCHAR* str, int32_t strLen, int32_t stringIdx) | |
{ | |
for (int32_t p = patternIdx; ; ++p) | |
{ | |
for (int32_t s = stringIdx; ; ++p, ++s) | |
{ | |
// End of str yet? | |
bool sEnd = (s >= strLen); | |
// End of pattern yet? | |
bool pEnd = (p >= patternLen); | |
// If we're looking at the end of the str... | |
if (sEnd) | |
{ | |
// Assume the only thing left on the pattern is/are wildcards | |
bool justWildcardsLeft = true; | |
// Current wildcard position | |
int32_t wildcardSearchPos = p; | |
// While we haven't found the end of the pattern, | |
// and haven't encountered any non-wildcard characters | |
while (wildcardSearchPos < patternLen && justWildcardsLeft) | |
{ | |
// Check the character at the current position | |
TCHAR wildchar = pattern[wildcardSearchPos]; | |
// If it's not a wildcard character, then there is more | |
// pattern information after this/these wildcards. | |
if (wildchar != LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR && | |
wildchar != LUCENE_WILDCARDTERMENUM_WILDCARD_STRING){ | |
justWildcardsLeft = false; | |
}else{ | |
// to prevent "cat" matches "ca??" | |
if (wildchar == LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR) | |
return false; | |
wildcardSearchPos++; // Look at the next character | |
} | |
} | |
// This was a prefix wildcard search, and we've matched, so | |
// return true. | |
if (justWildcardsLeft) | |
return true; | |
} | |
// If we've gone past the end of the str, or the pattern, | |
// return false. | |
if (sEnd || pEnd) | |
break; | |
// Match a single character, so continue. | |
if (pattern[p] == LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR) | |
continue; | |
if (pattern[p] == LUCENE_WILDCARDTERMENUM_WILDCARD_STRING) | |
{ | |
// Look at the character beyond the '*'. | |
++p; | |
// Examine the str, starting at the last character. | |
for (int32_t i = strLen; i >= s; --i) | |
{ | |
if (wildcardEquals(pattern, patternLen, p, str, strLen, i)) | |
return true; | |
} | |
break; | |
} | |
if (pattern[p] != str[s]) | |
break; | |
} | |
return false; | |
} | |
} | |
CL_NS_END |