blob: 4d4944ea01976adb566a14a6617c7e0206e8df32 [file] [log] [blame]
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_index_MultiReader
#define _lucene_index_MultiReader
#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif
#include "SegmentHeader.h"
#include "IndexReader.h"
#include "CLucene/document/Document.h"
#include "Terms.h"
#include "SegmentMergeQueue.h"
CL_NS_DEF(index)
/** An IndexReader which reads multiple indexes, appending their content.
*/
class MultiTermDocs:public virtual TermDocs {
private:
protected:
TermDocs** readerTermDocs;
IndexReader** subReaders;
int32_t subReadersLength;
const int32_t* starts;
Term* term;
int32_t base;
int32_t pointer;
TermDocs* current; // == segTermDocs[pointer]
public:
MultiTermDocs();
MultiTermDocs(IndexReader** subReaders, const int32_t* s);
virtual ~MultiTermDocs();
int32_t doc() const;
int32_t freq() const;
void seek(TermEnum* termEnum);
void seek(Term* tterm);
bool next();
/** Optimized implementation. */
int32_t read(int32_t* docs, int32_t* freqs, int32_t length);
/** As yet unoptimized implementation. */
bool skipTo(const int32_t target);
void close();
virtual TermPositions* __asTermPositions();
protected:
virtual TermDocs* termDocs(const IndexReader* reader) const;
private:
TermDocs* termDocs(const int32_t i) const;
};
//MultiTermEnum represents the enumeration of all terms of all readers
class MultiTermEnum:public TermEnum {
private:
SegmentMergeQueue* queue;
Term* _term;
int32_t _docFreq;
public:
//Constructor
//Opens all enumerations of all readers
MultiTermEnum(IndexReader** subReaders, const int32_t* starts, const Term* t);
//Destructor
~MultiTermEnum();
//Move the current term to the next in the set of enumerations
bool next();
//Returns a pointer to the current term of the set of enumerations
Term* term();
Term* term(bool pointer);
//Returns the document frequency of the current term in the set
int32_t docFreq() const;
//Closes the set of enumerations in the queue
void close();
const char* getObjectName(){ return MultiTermEnum::getClassName(); }
static const char* getClassName(){ return "MultiTermEnum"; }
};
class MultiTermPositions:public MultiTermDocs,public TermPositions {
public:
MultiTermPositions(IndexReader** subReaders, const int32_t* s);
~MultiTermPositions() {};
int32_t nextPosition();
virtual TermDocs* __asTermDocs();
virtual TermPositions* __asTermPositions();
protected:
TermDocs* termDocs(const IndexReader* reader) const;
};
class MultiReader:public IndexReader{
private:
bool _hasDeletions;
IndexReader** subReaders;
int32_t subReadersLength;
int32_t* starts; // 1st docno for each segment
CL_NS(util)::CLHashtable<const TCHAR*,uint8_t*,
CL_NS(util)::Compare::TChar,
CL_NS(util)::Equals::TChar,
CL_NS(util)::Deletor::tcArray,
CL_NS(util)::Deletor::Array<uint8_t> > normsCache;
int32_t _maxDoc;
int32_t _numDocs;
void initialize(IndexReader** subReaders);
int32_t readerIndex(const int32_t n) const;
bool hasNorms(const TCHAR* field);
uint8_t* ones;
uint8_t* fakeNorms();
protected:
void doSetNorm(int32_t n, const TCHAR* field, uint8_t value);
void doUndeleteAll();
void doCommit();
// synchronized
void doClose();
// synchronized
void doDelete(const int32_t n);
public:
/** Construct reading the named set of readers. */
MultiReader(CL_NS(store)::Directory* directory, SegmentInfos* sis, IndexReader** subReaders);
/**
* <p>Construct a MultiReader aggregating the named set of (sub)readers.
* Directory locking for delete, undeleteAll, and setNorm operations is
* left to the subreaders. </p>
* <p>Note that all subreaders are closed if this Multireader is closed.</p>
* @param subReaders set of (sub)readers
* @throws IOException
*/
MultiReader(IndexReader** subReaders);
~MultiReader();
/** Return an array of term frequency vectors for the specified document.
* The array contains a vector for each vectorized field in the document.
* Each vector vector contains term numbers and frequencies for all terms
* in a given vectorized field.
* If no such fields existed, the method returns null.
*/
bool getTermFreqVectors(int32_t n, Array<TermFreqVector*>& result);
TermFreqVector* getTermFreqVector(int32_t n, const TCHAR* field);
// synchronized
int32_t numDocs();
int32_t maxDoc() const;
bool document(int32_t n, CL_NS(document)::Document* doc);
bool isDeleted(const int32_t n);
bool hasDeletions() const{ return _hasDeletions; }
// synchronized
uint8_t* norms(const TCHAR* field);
void norms(const TCHAR* field, uint8_t* result);
TermEnum* terms() const;
TermEnum* terms(const Term* term) const;
//Returns the document frequency of the current term in the set
int32_t docFreq(const Term* t=NULL) const;
TermDocs* termDocs() const;
TermPositions* termPositions() const;
/**
* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
*/
void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray);
};
CL_NS_END
#endif