Windows-4.7.4/src/3rdparty/clucene/src/CLucene/analysis/AnalysisHeader.cpp - platform/external/qt - Git at Google

 /*------------------------------------------------------------------------------
 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
 *
 * Distributable under the terms of either the Apache License (Version 2.0) or
 * the GNU Lesser General Public License, as specified in the COPYING file.
 ------------------------------------------------------------------------------*/
 #include "CLucene/StdHeader.h"
 #include "AnalysisHeader.h"
 #include "CLucene/util/StringBuffer.h"

 CL_NS_USE(util)
 CL_NS_DEF(analysis)

 const TCHAR* Token::defaultType=_T("word");

 Token::Token():
 	_startOffset (0),
 	_endOffset (0),
 	_type ( defaultType ),
 	positionIncrement (1)
 {
     _termTextLen = 0;
 #ifndef LUCENE_TOKEN_WORD_LENGTH
     _termText = NULL;
 	bufferTextLen = 0;
 #else
     _termText[0] = 0; //make sure null terminated
 	bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1;
 #endif
 }

 Token::~Token(){
 #ifndef LUCENE_TOKEN_WORD_LENGTH
     free(_termText);
 #endif
 }

 Token::Token(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ):
 	_startOffset (start),
 	_endOffset (end),
 	_type ( typ ),
 	positionIncrement (1)
 {
     _termTextLen = 0;
 #ifndef LUCENE_TOKEN_WORD_LENGTH
     _termText = NULL;
 	bufferTextLen = 0;
 #else
     _termText[0] = 0; //make sure null terminated
 	bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1;
 #endif
 	setText(text);
 }

 void Token::set(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ){
 	_startOffset = start;
 	_endOffset   = end;
 	_type        = typ;
 	positionIncrement = 1;
 	setText(text);
 }

 void Token::setText(const TCHAR* text){
 	_termTextLen = _tcslen(text);

 #ifndef LUCENE_TOKEN_WORD_LENGTH
 	growBuffer(_termTextLen+1);
 	_tcsncpy(_termText,text,_termTextLen+1);
 #else
 	if ( _termTextLen > LUCENE_TOKEN_WORD_LENGTH ){
     	//in the case where this occurs, we will leave the endOffset as it is
     	//since the actual word still occupies that space.
 		_termTextLen=LUCENE_TOKEN_WORD_LENGTH;
 	}
 	_tcsncpy(_termText,text,_termTextLen+1);
 #endif
 	_termText[_termTextLen] = 0; //make sure null terminated
 }

 void Token::growBuffer(size_t size){
 	if(bufferTextLen>=size)
 		return;
 #ifndef LUCENE_TOKEN_WORD_LENGTH
 	if ( _termText == NULL )
 		_termText = (TCHAR*)malloc( size * sizeof(TCHAR) );
 	else
 		_termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) );
 	bufferTextLen = size;
 #else
 	_CLTHROWA(CL_ERR_TokenMgr,"Couldn't grow Token buffer");
 #endif
 }

 void Token::setPositionIncrement(int32_t posIncr) {
 	if (posIncr < 0) {
 		_CLTHROWA(CL_ERR_IllegalArgument,"positionIncrement must be >= 0");
 	}
 	positionIncrement = posIncr;
 }

 int32_t Token::getPositionIncrement() const { return positionIncrement; }

 // Returns the Token's term text.
 const TCHAR* Token::termText() const{
 	return (const TCHAR*) _termText;
 }
 size_t Token::termTextLength() {
 	if ( _termTextLen == -1 ) //it was invalidated by growBuffer
 		_termTextLen = _tcslen(_termText);
 	return _termTextLen;
 }
 void Token::resetTermTextLen(){
 	_termTextLen=-1;
 }
 bool Token::OrderCompare::operator()( Token* t1, Token* t2 ) const{
 	if(t1->startOffset()>t2->startOffset())
         return false;
     if(t1->startOffset()<t2->startOffset())
         return true;
 	return true;
 }
 TCHAR* Token::toString() const{
 	StringBuffer sb;
     sb.append(_T("("));
     sb.append( _termText );
     sb.append(_T(","));
     sb.appendInt( _startOffset );
     sb.append(_T(","));
     sb.appendInt( _endOffset );

     if (!_tcscmp( _type, _T("word")) == 0 ){
       sb.append(_T(",type="));
       sb.append(_type);
     }
     if (positionIncrement != 1){
       sb.append(_T(",posIncr="));
       sb.appendInt(positionIncrement);
     }
     sb.append(_T(")"));

     return sb.toString();
 }


 Token* TokenStream::next(){
 	Token* t = _CLNEW Token; //deprecated
 	if ( !next(t) )
 		_CLDELETE(t);
 	return t;
 }


 TokenFilter::TokenFilter(TokenStream* in, bool deleteTS):
 	input(in),
 	deleteTokenStream(deleteTS)
 {
 }
 TokenFilter::~TokenFilter(){
 	close();
 }

 // Close the input TokenStream.
 void TokenFilter::close() {
     if ( input != NULL ){
 		input->close();
         if ( deleteTokenStream )
 			_CLDELETE( input );
     }
     input = NULL;
 }


 Tokenizer::Tokenizer() {
 	input = NULL;
 }

 Tokenizer::Tokenizer(CL_NS(util)::Reader* _input):
     input(_input)
 {
 }

 void Tokenizer::close(){
 	if (input != NULL) {
 		// ? delete input;
 		input = NULL;
 	}
 }

 Tokenizer::~Tokenizer(){
     close();
 }


 int32_t Analyzer::getPositionIncrementGap(const TCHAR* fieldName)
 {
 	return 0;
 }

 CL_NS_END
	/*------------------------------------------------------------------------------
	* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
	*
	* Distributable under the terms of either the Apache License (Version 2.0) or
	* the GNU Lesser General Public License, as specified in the COPYING file.
	------------------------------------------------------------------------------*/
	#include "CLucene/StdHeader.h"
	#include "AnalysisHeader.h"
	#include "CLucene/util/StringBuffer.h"

	CL_NS_USE(util)
	CL_NS_DEF(analysis)

	const TCHAR* Token::defaultType=_T("word");

	Token::Token():
	_startOffset (0),
	_endOffset (0),
	_type ( defaultType ),
	positionIncrement (1)
	{
	_termTextLen = 0;
	#ifndef LUCENE_TOKEN_WORD_LENGTH
	_termText = NULL;
	bufferTextLen = 0;
	#else
	_termText[0] = 0; //make sure null terminated
	bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1;
	#endif
	}

	Token::~Token(){
	#ifndef LUCENE_TOKEN_WORD_LENGTH
	free(_termText);
	#endif
	}

	Token::Token(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ):
	_startOffset (start),
	_endOffset (end),
	_type ( typ ),
	positionIncrement (1)
	{
	_termTextLen = 0;
	#ifndef LUCENE_TOKEN_WORD_LENGTH
	_termText = NULL;
	bufferTextLen = 0;
	#else
	_termText[0] = 0; //make sure null terminated
	bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1;
	#endif
	setText(text);
	}

	void Token::set(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ){
	_startOffset = start;
	_endOffset = end;
	_type = typ;
	positionIncrement = 1;
	setText(text);
	}

	void Token::setText(const TCHAR* text){
	_termTextLen = _tcslen(text);

	#ifndef LUCENE_TOKEN_WORD_LENGTH
	growBuffer(_termTextLen+1);
	_tcsncpy(_termText,text,_termTextLen+1);
	#else
	if ( _termTextLen > LUCENE_TOKEN_WORD_LENGTH ){
	//in the case where this occurs, we will leave the endOffset as it is
	//since the actual word still occupies that space.
	_termTextLen=LUCENE_TOKEN_WORD_LENGTH;
	}
	_tcsncpy(_termText,text,_termTextLen+1);
	#endif
	_termText[_termTextLen] = 0; //make sure null terminated
	}

	void Token::growBuffer(size_t size){
	if(bufferTextLen>=size)
	return;
	#ifndef LUCENE_TOKEN_WORD_LENGTH
	if ( _termText == NULL )
	_termText = (TCHAR)malloc( size sizeof(TCHAR) );
	else
	_termText = (TCHAR)realloc( _termText, size sizeof(TCHAR) );
	bufferTextLen = size;
	#else
	_CLTHROWA(CL_ERR_TokenMgr,"Couldn't grow Token buffer");
	#endif
	}

	void Token::setPositionIncrement(int32_t posIncr) {
	if (posIncr < 0) {
	_CLTHROWA(CL_ERR_IllegalArgument,"positionIncrement must be >= 0");
	}
	positionIncrement = posIncr;
	}

	int32_t Token::getPositionIncrement() const { return positionIncrement; }

	// Returns the Token's term text.
	const TCHAR* Token::termText() const{
	return (const TCHAR*) _termText;
	}
	size_t Token::termTextLength() {
	if ( _termTextLen == -1 ) //it was invalidated by growBuffer
	_termTextLen = _tcslen(_termText);
	return _termTextLen;
	}
	void Token::resetTermTextLen(){
	_termTextLen=-1;
	}
	bool Token::OrderCompare::operator()( Token* t1, Token* t2 ) const{
	if(t1->startOffset()>t2->startOffset())
	return false;
	if(t1->startOffset()<t2->startOffset())
	return true;
	return true;
	}
	TCHAR* Token::toString() const{
	StringBuffer sb;
	sb.append(_T("("));
	sb.append( _termText );
	sb.append(_T(","));
	sb.appendInt( _startOffset );
	sb.append(_T(","));
	sb.appendInt( _endOffset );

	if (!_tcscmp( _type, _T("word")) == 0 ){
	sb.append(_T(",type="));
	sb.append(_type);
	}
	if (positionIncrement != 1){
	sb.append(_T(",posIncr="));
	sb.appendInt(positionIncrement);
	}
	sb.append(_T(")"));

	return sb.toString();
	}


	Token* TokenStream::next(){
	Token* t = _CLNEW Token; //deprecated
	if ( !next(t) )
	_CLDELETE(t);
	return t;
	}


	TokenFilter::TokenFilter(TokenStream* in, bool deleteTS):
	input(in),
	deleteTokenStream(deleteTS)
	{
	}
	TokenFilter::~TokenFilter(){
	close();
	}

	// Close the input TokenStream.
	void TokenFilter::close() {
	if ( input != NULL ){
	input->close();
	if ( deleteTokenStream )
	_CLDELETE( input );
	}
	input = NULL;
	}



	Tokenizer::Tokenizer() {
	input = NULL;
	}

	Tokenizer::Tokenizer(CL_NS(util)::Reader* _input):
	input(_input)
	{
	}

	void Tokenizer::close(){
	if (input != NULL) {
	// ? delete input;
	input = NULL;
	}
	}

	Tokenizer::~Tokenizer(){
	close();
	}


	int32_t Analyzer::getPositionIncrementGap(const TCHAR* fieldName)
	{
	return 0;
	}

	CL_NS_END