/* | |
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team | |
* | |
* Distributable under the terms of either the Apache License (Version 2.0) or | |
* the GNU Lesser General Public License, as specified in the COPYING file. | |
* | |
* Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. | |
*/ | |
# include <QtCore/QFile> | |
# include <QtCore/QStringList> | |
#include "CLucene/StdHeader.h" | |
#include "IndexReader.h" | |
#include "IndexWriter.h" | |
#include "CLucene/store/Directory.h" | |
#include "CLucene/store/FSDirectory.h" | |
#include "CLucene/store/Lock.h" | |
#include "CLucene/document/Document.h" | |
#include "CLucene/search/Similarity.h" | |
#include "SegmentInfos.h" | |
#include "MultiReader.h" | |
#include "Terms.h" | |
CL_NS_USE(util) | |
CL_NS_USE(store) | |
CL_NS_DEF(index) | |
IndexReader::IndexReader(Directory* dir) | |
: stale(false) | |
, hasChanges(false) | |
, closeDirectory(false) | |
, directoryOwner(false) | |
, segmentInfos(NULL) | |
, directory(_CL_POINTER(dir)) | |
, writeLock(NULL) | |
{ | |
} | |
IndexReader::IndexReader(Directory* dir, SegmentInfos* infos, bool close) | |
: stale(false) | |
, hasChanges(false) | |
, closeDirectory(close) | |
, directoryOwner(true) | |
, segmentInfos(infos) | |
, directory(_CL_POINTER(dir)) | |
, writeLock(NULL) | |
{ | |
} | |
IndexReader::~IndexReader() | |
{ | |
if (writeLock != NULL) { | |
writeLock->release(); | |
_CLDELETE(writeLock); | |
} | |
_CLDELETE(segmentInfos); | |
_CLDECDELETE(directory); | |
} | |
IndexReader* IndexReader::open(const QString& path) | |
{ | |
//Func - Static method. | |
// Returns an IndexReader reading the index in an FSDirectory in the named path. | |
//Pre - path != NULL and contains the path of the index for which an IndexReader must be | |
// instantiated | |
// closeDir indicates if the directory needs to be closed | |
//Post - An IndexReader has been returned that reads tnhe index located at path | |
CND_PRECONDITION(!path.isEmpty(), "path is NULL"); | |
Directory* dir = FSDirectory::getDirectory(path, false); | |
IndexReader* reader = open(dir, true); | |
//because fsdirectory will now have a refcount of 1 more than | |
//if the reader had been opened with a directory object, | |
//we need to do a refdec | |
_CLDECDELETE(dir); | |
return reader; | |
} | |
IndexReader* IndexReader::open(Directory* directory, bool closeDirectory) | |
{ | |
//Func - Static method. | |
// Returns an IndexReader reading the index in an FSDirectory in the named path. | |
//Pre - directory represents a directory | |
// closeDir indicates if the directory needs to be closed | |
//Post - An IndexReader has been returned that reads the index located at directory | |
// in- & inter-process sync | |
SCOPED_LOCK_MUTEX(directory->THIS_LOCK) | |
//Instantiate an IndexReader::LockWith which can produce an IndexReader | |
LuceneLock* lock = directory->makeLock(QLatin1String("commit.lock")); | |
IndexReader::LockWith with(lock, directory); | |
IndexReader* ret = NULL; | |
try { | |
//Create an IndexReader reading the index | |
ret = with.runAndReturn(); | |
} _CLFINALLY ( | |
_CLDELETE(lock); | |
); | |
CND_CONDITION(ret != NULL, "ret is NULL"); | |
ret->closeDirectory = closeDirectory; | |
return ret; | |
} | |
CL_NS(document)::Document* IndexReader::document(const int32_t n) | |
{ | |
CL_NS(document)::Document* ret = _CLNEW CL_NS(document)::Document; | |
if (!document(n, ret)) | |
_CLDELETE(ret); | |
return ret; | |
} | |
IndexReader* IndexReader::LockWith::doBody() | |
{ | |
//Func - Reads the segmentinfo file and depending on the number of segments found | |
// it returns a SegmentsReader or a SegmentReader | |
//Pre - directory != NULL | |
//Post - Depending on the number of Segments present in directory this method | |
// returns an empty SegmentsReader when there are no segments, a SegmentReader when | |
// directory contains 1 segment and a nonempty SegmentsReader when directory | |
// contains multiple segements | |
CND_PRECONDITION(directory != NULL, "directory is NULL"); | |
//Instantiate SegmentInfos | |
SegmentInfos* infos = _CLNEW SegmentInfos; | |
try { | |
//Have SegmentInfos read the segments file in directory | |
infos->read(directory); | |
} catch(...) { | |
//make sure infos is cleaned up | |
_CLDELETE(infos); | |
throw; | |
} | |
// If there is at least one segment (if infos.size() >= 1), the last | |
// SegmentReader object will close the directory when the SegmentReader | |
// object itself is closed (see SegmentReader::doClose). | |
// If there are no segments, there will be no "last SegmentReader object" | |
// to fulfill this responsibility, so we need to explicitly close the | |
// directory in the segmentsreader.close | |
//Count the number segments in the directory | |
const uint32_t nSegs = infos->size(); | |
if (nSegs == 1 ) { | |
// index is optimized | |
return _CLNEW SegmentReader(infos, infos->info(0)); | |
} else { | |
//Instantiate an array of pointers to SegmentReaders of size nSegs (The number of segments in the index) | |
IndexReader** readers = NULL; | |
if (nSegs > 0){ | |
uint32_t infosize = infos->size(); | |
readers = _CL_NEWARRAY(IndexReader*,infosize+1); | |
for (uint32_t i = 0; i < infosize; ++i) { | |
//Instantiate a SegementReader responsible for reading the i-th segment and store it in | |
//the readers array | |
readers[i] = _CLNEW SegmentReader(infos->info(i)); | |
} | |
readers[infosize] = NULL; | |
} | |
//return an instance of SegmentsReader which is a reader that manages all Segments | |
return _CLNEW MultiReader(directory, infos, readers); | |
}// end if | |
} | |
uint64_t IndexReader::lastModified(const QString& directory) | |
{ | |
//Func - Static method | |
// Returns the time the index in the named directory was last modified. | |
//Pre - directory != NULL and contains the path name of the directory to check | |
//Post - The last modified time of the index has been returned | |
CND_PRECONDITION(!directory.isEmpty(), "directory is NULL"); | |
return FSDirectory::fileModified(directory, QLatin1String("segments")); | |
} | |
int64_t IndexReader::getCurrentVersion(Directory* directory) | |
{ | |
// in- & inter-process sync | |
SCOPED_LOCK_MUTEX(directory->THIS_LOCK) | |
int64_t ret = 0; | |
bool locked = false; | |
LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); | |
try { | |
locked = commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT); | |
ret = SegmentInfos::readCurrentVersion(directory); | |
} _CLFINALLY ( | |
if (locked) | |
commitLock->release(); | |
_CLDELETE(commitLock); | |
) | |
return ret; | |
} | |
int64_t IndexReader::getCurrentVersion(const QString& directory) | |
{ | |
Directory* dir = FSDirectory::getDirectory(directory, false); | |
int64_t version = getCurrentVersion(dir); | |
dir->close(); | |
_CLDECDELETE(dir); | |
return version; | |
} | |
int64_t IndexReader::getVersion() | |
{ | |
return segmentInfos->getVersion(); | |
} | |
bool IndexReader::isCurrent() | |
{ | |
// in- & inter-process sync | |
SCOPED_LOCK_MUTEX(directory->THIS_LOCK) | |
bool ret = false; | |
bool locked = false; | |
LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); | |
try { | |
locked = commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT); | |
ret = SegmentInfos::readCurrentVersion(directory) | |
== segmentInfos->getVersion(); | |
} _CLFINALLY( | |
if (locked) | |
commitLock->release(); | |
_CLDELETE(commitLock); | |
) | |
return ret; | |
} | |
uint64_t IndexReader::lastModified(const Directory* directory) | |
{ | |
//Func - Static method | |
// Returns the time the index in this directory was last modified. | |
//Pre - directory contains a valid reference | |
//Post - The last modified time of the index has been returned | |
return directory->fileModified(QLatin1String("segments")); | |
} | |
bool IndexReader::indexExists(const QString& directory) | |
{ | |
//Func - Static method | |
// Checks if an index exists in the named directory | |
//Pre - directory != NULL | |
//Post - Returns true if an index exists at the specified directory-> | |
// If the directory does not exist or if there is no index in it. | |
// false is returned. | |
CND_PRECONDITION(!directory.isEmpty(), "directory is NULL"); | |
return QFile(directory + QLatin1String("/segments")).exists(); | |
} | |
void IndexReader::setNorm(int32_t doc, const TCHAR* field, uint8_t value) | |
{ | |
SCOPED_LOCK_MUTEX(THIS_LOCK) | |
if(directoryOwner) | |
aquireWriteLock(); | |
doSetNorm(doc, field, value); | |
hasChanges = true; | |
} | |
void IndexReader::aquireWriteLock() | |
{ | |
if (stale) { | |
_CLTHROWA(CL_ERR_IO, | |
"IndexReader out of date and no longer valid for delete, " | |
"undelete, or setNorm operations"); | |
} | |
if (writeLock == NULL) { | |
LuceneLock* writeLock = directory->makeLock(QLatin1String("write.lock")); | |
if (!writeLock->obtain(IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock | |
_CLTHROWA(CL_ERR_IO,"Index locked for write"); // + writeLock | |
this->writeLock = writeLock; | |
// we have to check whether index has changed since this reader was opened. | |
// if so, this reader is no longer valid for deletion | |
if (SegmentInfos::readCurrentVersion(directory) > segmentInfos->getVersion()) { | |
stale = true; | |
this->writeLock->release(); | |
_CLDELETE(this->writeLock); | |
_CLTHROWA(CL_ERR_IO,"IndexReader out of date and no longer valid " | |
"for delete, undelete, or setNorm operations"); | |
} | |
} | |
} | |
void IndexReader::setNorm(int32_t doc, const TCHAR* field, qreal value) | |
{ | |
setNorm(doc, field, CL_NS(search)::Similarity::encodeNorm(value)); | |
} | |
bool IndexReader::indexExists(const Directory* directory) | |
{ | |
//Func - Static method | |
// Checks if an index exists in the directory | |
//Pre - directory is a valid reference | |
//Post - Returns true if an index exists at the specified directory-> | |
// If the directory does not exist or if there is no index in it. | |
// false is returned. | |
return directory->fileExists(QLatin1String("segments")); | |
} | |
TermDocs* IndexReader::termDocs(Term* term) const | |
{ | |
//Func - Returns an enumeration of all the documents which contain | |
// term. For each document, the document number, the frequency of | |
// the term in that document is also provided, for use in search scoring. | |
// Thus, this method implements the mapping: | |
// | |
// Term => <docNum, freq>* | |
// The enumeration is ordered by document number. Each document number | |
// is greater than all that precede it in the enumeration. | |
//Pre - term != NULL | |
//Post - A reference to TermDocs containing an enumeration of all found documents | |
// has been returned | |
CND_PRECONDITION(term != NULL, "term is NULL"); | |
//Reference an instantiated TermDocs instance | |
TermDocs* _termDocs = termDocs(); | |
//Seek all documents containing term | |
_termDocs->seek(term); | |
//return the enumaration | |
return _termDocs; | |
} | |
TermPositions* IndexReader::termPositions(Term* term) const | |
{ | |
//Func - Returns an enumeration of all the documents which contain term. For each | |
// document, in addition to the document number and frequency of the term in | |
// that document, a list of all of the ordinal positions of the term in the document | |
// is available. Thus, this method implements the mapping: | |
// | |
// Term => <docNum, freq,<pos 1, pos 2, ...pos freq-1>>* | |
// | |
// This positional information faciliates phrase and proximity searching. | |
// The enumeration is ordered by document number. Each document number is greater than | |
// all that precede it in the enumeration. | |
//Pre - term != NULL | |
//Post - A reference to TermPositions containing an enumeration of all found documents | |
// has been returned | |
CND_PRECONDITION(term != NULL, "term is NULL"); | |
//Reference an instantiated termPositions instance | |
TermPositions* _termPositions = termPositions(); | |
//Seek all documents containing term | |
_termPositions->seek(term); | |
//return the enumeration | |
return _termPositions; | |
} | |
void IndexReader::deleteDocument(const int32_t docNum) | |
{ | |
//Func - Deletes the document numbered docNum. Once a document is deleted it will not appear | |
// in TermDocs or TermPostitions enumerations. Attempts to read its field with the document | |
// method will result in an error. The presence of this document may still be reflected in | |
// the docFreq statistic, though this will be corrected eventually as the index is further modified. | |
//Pre - docNum >= 0 | |
//Post - If successful the document identified by docNum has been deleted. If no writelock | |
// could be obtained an exception has been thrown stating that the index was locked or has no write access | |
SCOPED_LOCK_MUTEX(THIS_LOCK) | |
CND_PRECONDITION(docNum >= 0, "docNum is negative"); | |
if (directoryOwner) | |
aquireWriteLock(); | |
//Have the document identified by docNum deleted | |
doDelete(docNum); | |
hasChanges = true; | |
} | |
/** | |
* Commit changes resulting from delete, undeleteAll, or setNorm operations | |
* | |
* @throws IOException | |
*/ | |
void IndexReader::commit() | |
{ | |
SCOPED_LOCK_MUTEX(THIS_LOCK) | |
if(hasChanges){ | |
if(directoryOwner){ | |
{ | |
SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync | |
LuceneLock* commitLock = directory->makeLock(QLatin1String("commit.lock")); | |
IndexReader::CommitLockWith cl(commitLock,this); | |
cl.run(); | |
_CLDELETE(commitLock); | |
} | |
if (writeLock != NULL) { | |
writeLock->release(); // release write lock | |
_CLDELETE(writeLock); | |
} | |
}else | |
doCommit(); | |
} | |
hasChanges = false; | |
} | |
void IndexReader::undeleteAll() | |
{ | |
SCOPED_LOCK_MUTEX(THIS_LOCK) | |
if(directoryOwner) | |
aquireWriteLock(); | |
doUndeleteAll(); | |
hasChanges = true; | |
} | |
int32_t IndexReader::deleteDocuments(Term* term) | |
{ | |
//Func - Deletes all documents containing term. This is useful if one uses a | |
// document field to hold a unique ID string for the document. Then to delete such | |
// a document, one merely constructs a term with the appropriate field and the unique | |
// ID string as its text and passes it to this method. | |
//Pre - term != NULL | |
//Post - All documents containing term have been deleted. The number of deleted documents | |
// has been returned | |
CND_PRECONDITION(term != NULL, "term is NULL"); | |
//Search for the documents contain term | |
TermDocs* docs = termDocs(term); | |
//Check if documents have been found | |
if ( docs == NULL ){ | |
return 0; | |
} | |
//initialize | |
int32_t Counter = 0; | |
try { | |
//iterate through the found documents | |
while (docs->next()) { | |
//Delete the document | |
deleteDocument(docs->doc()); | |
++Counter; | |
} | |
}_CLFINALLY( | |
//Close the enumeration | |
docs->close(); | |
); | |
//Delete the enumeration of found documents | |
_CLDELETE( docs ); | |
//Return the number of deleted documents | |
return Counter; | |
} | |
TCHAR** IndexReader::getFieldNames() | |
{ | |
CL_NS(util)::StringArrayWithDeletor array; | |
getFieldNames(IndexReader::ALL, array); | |
array.setDoDelete(false); | |
TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1); | |
int j=0; | |
CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin(); | |
while ( itr != array.end() ){ | |
ret[j]=*itr; | |
++j;++itr; | |
} | |
ret[j]=NULL; | |
return ret; | |
} | |
TCHAR** IndexReader::getFieldNames(bool indexed) | |
{ | |
CL_NS(util)::StringArrayWithDeletor array; | |
getFieldNames(indexed?IndexReader::INDEXED:IndexReader::UNINDEXED, array); | |
array.setDoDelete(false); | |
TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1); | |
int j=0; | |
CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin(); | |
while ( itr != array.end() ){ | |
ret[j]=*itr; | |
++j;++itr; | |
} | |
ret[j]=NULL; | |
return ret; | |
} | |
void IndexReader::close() | |
{ | |
//Func - Closes files associated with this index and also saves any new deletions to disk. | |
// No other methods should be called after this has been called. | |
//Pre - true | |
//Post - All files associated with this index have been deleted and new deletions have been | |
// saved to disk | |
SCOPED_LOCK_MUTEX(THIS_LOCK) | |
CloseCallbackMap::iterator iter; | |
for (iter = closeCallbacks.begin(); iter != closeCallbacks.end(); iter++) { | |
CloseCallback callback = *iter->first; | |
callback(this, iter->second); | |
} | |
commit(); | |
doClose(); | |
if(closeDirectory) { | |
directory->close(); | |
_CLDECDELETE(directory); | |
} | |
} | |
bool IndexReader::isLocked(Directory* directory) | |
{ | |
//Func - Static method | |
// Checks if the index in the directory is currently locked. | |
//Pre - directory is a valid reference to a directory to check for a lock | |
//Post - Returns true if the index in the named directory is locked otherwise false | |
//Check the existence of the file write.lock and return true when it does and false | |
//when it doesn't | |
LuceneLock* l1 = directory->makeLock(QLatin1String("write.lock")); | |
LuceneLock* l2 = directory->makeLock(QLatin1String("commit.lock")); | |
bool ret = l1->isLocked() || l2->isLocked(); | |
_CLDELETE(l1); | |
_CLDELETE(l2); | |
return ret; | |
} | |
bool IndexReader::isLocked(const QString& directory) | |
{ | |
//Func - Static method | |
// Checks if the index in the named directory is currently locked. | |
//Pre - directory != NULL and contains the directory to check for a lock | |
//Post - Returns true if the index in the named directory is locked otherwise false | |
CND_PRECONDITION(!directory.isEmpty(), "directory is NULL"); | |
Directory* dir = FSDirectory::getDirectory(directory, false); | |
bool ret = isLocked(dir); | |
dir->close(); | |
_CLDECDELETE(dir); | |
return ret; | |
} | |
/** Returns true if there are norms stored for this field. */ | |
bool IndexReader::hasNorms(const TCHAR* field) | |
{ | |
// backward compatible implementation. | |
// SegmentReader has an efficient implementation. | |
return norms(field) != NULL; | |
} | |
void IndexReader::unlock(const QString& path) | |
{ | |
FSDirectory* dir = FSDirectory::getDirectory(path, false); | |
unlock(dir); | |
dir->close(); | |
_CLDECDELETE(dir); | |
} | |
void IndexReader::unlock(Directory* directory) | |
{ | |
//Func - Static method | |
// Forcibly unlocks the index in the named directory-> | |
// Caution: this should only be used by failure recovery code, | |
// when it is known that no other process nor thread is in fact | |
// currently accessing this index. | |
//Pre - directory is a valid reference to a directory | |
//Post - The directory has been forcibly unlocked | |
LuceneLock* lock; | |
lock = directory->makeLock(QLatin1String("write.lock")); | |
lock->release(); | |
_CLDELETE(lock); | |
lock = directory->makeLock(QLatin1String("commit.lock")); | |
lock->release(); | |
_CLDELETE(lock); | |
} | |
bool IndexReader::isLuceneFile(const QString& filename) | |
{ | |
if (filename.isNull() || filename.isEmpty()) | |
return false; | |
size_t len = filename.length(); | |
if (len < 6) //need at least x.frx | |
return false; | |
if (filename == QLatin1String("segments")) | |
return true; | |
if (filename == QLatin1String("segments.new")) | |
return true; | |
if (filename == QLatin1String("deletable")) | |
return true; | |
QStringList extList; | |
extList << QLatin1String(".cfs") | |
<< QLatin1String(".fnm") << QLatin1String(".fdx") << QLatin1String(".fdt") | |
<< QLatin1String(".tii") << QLatin1String(".tis") << QLatin1String(".frq") | |
<< QLatin1String(".prx") << QLatin1String(".del") << QLatin1String(".tvx") | |
<< QLatin1String(".tvd") << QLatin1String(".tvf") << QLatin1String(".tvp"); | |
QString suffix = filename.right(4); | |
if (extList.contains(suffix, Qt::CaseInsensitive)) | |
return true; | |
if (suffix.leftRef(2) == QLatin1String(".f")) { | |
suffix = suffix.remove(0, 2); | |
if (suffix.length() > 0) { | |
for (int i = 0; i < suffix.length(); ++i) { | |
if (!suffix.at(i).isDigit()) | |
return false; | |
} | |
return true; | |
} | |
} | |
return false; | |
} | |
void IndexReader::addCloseCallback(CloseCallback callback, void* parameter) | |
{ | |
closeCallbacks.put(callback, parameter); | |
} | |
// #pragma mark -- IndexReader::LockWith | |
IndexReader::LockWith::LockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir) | |
: CL_NS(store)::LuceneLockWith<IndexReader*>(lock, IndexWriter::COMMIT_LOCK_TIMEOUT) | |
{ | |
this->directory = dir; | |
} | |
// #pragma mark -- IndexReader::CommitLockWith | |
IndexReader::CommitLockWith::CommitLockWith(CL_NS(store)::LuceneLock* lock, IndexReader* r) | |
: CL_NS(store)::LuceneLockWith<void>(lock,IndexWriter::COMMIT_LOCK_TIMEOUT) | |
, reader(r) | |
{ | |
} | |
void IndexReader::CommitLockWith::doBody() | |
{ | |
reader->doCommit(); | |
reader->segmentInfos->write(reader->getDirectory()); | |
} | |
CL_NS_END |