blob: 7b3f8383cc7f07d87392b7bb27baff029156b4e4 [file] [log] [blame]
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "StandardFilter.h"
#include "../AnalysisHeader.h"
#include "../Analyzers.h"
#include "StandardTokenizerConstants.h"
#include "CLucene/util/StringBuffer.h"
CL_NS_USE(analysis)
CL_NS_USE(util)
CL_NS_DEF2(analysis,standard)
StandardFilter::StandardFilter(TokenStream* in, bool deleteTokenStream):
TokenFilter(in, deleteTokenStream)
{
}
StandardFilter::~StandardFilter(){
}
bool StandardFilter::next(Token* t) {
if (!input->next(t))
return false;
TCHAR* text = t->_termText;
const int32_t textLength = t->termTextLength();
const TCHAR* type = t->type();
if ( type == tokenImage[APOSTROPHE] && //we can compare the type directy since the type should always come from the tokenImage
( textLength >= 2 && _tcsicmp(text+textLength-2, _T("'s"))==0 ) )
{
// remove 's
text[textLength-2]=0;
t->resetTermTextLen();
return true;
} else if ( type == tokenImage[ACRONYM] ) { // remove dots
int32_t j = 0;
for ( int32_t i=0;i<textLength;i++ ){
if ( text[i] != '.' )
text[j++]=text[i];
}
text[j]=0;
return true;
} else {
return true;
}
}
CL_NS_END2