/*------------------------------------------------------------------------------ | |
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team | |
* | |
* Distributable under the terms of either the Apache License (Version 2.0) or | |
* the GNU Lesser General Public License, as specified in the COPYING file. | |
------------------------------------------------------------------------------*/ | |
#include "CLucene/StdHeader.h" | |
#include "StandardFilter.h" | |
#include "../AnalysisHeader.h" | |
#include "../Analyzers.h" | |
#include "StandardTokenizerConstants.h" | |
#include "CLucene/util/StringBuffer.h" | |
CL_NS_USE(analysis) | |
CL_NS_USE(util) | |
CL_NS_DEF2(analysis,standard) | |
StandardFilter::StandardFilter(TokenStream* in, bool deleteTokenStream): | |
TokenFilter(in, deleteTokenStream) | |
{ | |
} | |
StandardFilter::~StandardFilter(){ | |
} | |
bool StandardFilter::next(Token* t) { | |
if (!input->next(t)) | |
return false; | |
TCHAR* text = t->_termText; | |
const int32_t textLength = t->termTextLength(); | |
const TCHAR* type = t->type(); | |
if ( type == tokenImage[APOSTROPHE] && //we can compare the type directy since the type should always come from the tokenImage | |
( textLength >= 2 && _tcsicmp(text+textLength-2, _T("'s"))==0 ) ) | |
{ | |
// remove 's | |
text[textLength-2]=0; | |
t->resetTermTextLen(); | |
return true; | |
} else if ( type == tokenImage[ACRONYM] ) { // remove dots | |
int32_t j = 0; | |
for ( int32_t i=0;i<textLength;i++ ){ | |
if ( text[i] != '.' ) | |
text[j++]=text[i]; | |
} | |
text[j]=0; | |
return true; | |
} else { | |
return true; | |
} | |
} | |
CL_NS_END2 |