blob: 91c7356326ef408f2b7fa3e51245f2c9bfb1c8d6 [file] [log] [blame]
/*
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
*
* Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
*/
# include <QtCore/QFile>
# include <QtCore/QStringList>
#include "CLucene/StdHeader.h"
#include "IndexReader.h"
#include "IndexWriter.h"
#include "CLucene/store/Directory.h"
#include "CLucene/store/FSDirectory.h"
#include "CLucene/store/Lock.h"
#include "CLucene/document/Document.h"
#include "CLucene/search/Similarity.h"
#include "SegmentInfos.h"
#include "MultiReader.h"
#include "Terms.h"
CL_NS_USE(util)
CL_NS_USE(store)
CL_NS_DEF(index)
IndexReader::IndexReader(Directory* dir)
: stale(false)
, hasChanges(false)
, closeDirectory(false)
, directoryOwner(false)
, segmentInfos(NULL)
, directory(_CL_POINTER(dir))
, writeLock(NULL)
{
}
IndexReader::IndexReader(Directory* dir, SegmentInfos* infos, bool close)
: stale(false)
, hasChanges(false)
, closeDirectory(close)
, directoryOwner(true)
, segmentInfos(infos)
, directory(_CL_POINTER(dir))
, writeLock(NULL)
{
}
IndexReader::~IndexReader()
{
if (writeLock != NULL) {
writeLock->release();
_CLDELETE(writeLock);
}
_CLDELETE(segmentInfos);
_CLDECDELETE(directory);
}
IndexReader* IndexReader::open(const QString& path)
{
//Func - Static method.
// Returns an IndexReader reading the index in an FSDirectory in the named path.
//Pre - path != NULL and contains the path of the index for which an IndexReader must be
// instantiated
// closeDir indicates if the directory needs to be closed
//Post - An IndexReader has been returned that reads tnhe index located at path
CND_PRECONDITION(!path.isEmpty(), "path is NULL");
Directory* dir = FSDirectory::getDirectory(path, false);
IndexReader* reader = open(dir, true);
//because fsdirectory will now have a refcount of 1 more than
//if the reader had been opened with a directory object,
//we need to do a refdec
_CLDECDELETE(dir);
return reader;
}
IndexReader* IndexReader::open(Directory* directory, bool closeDirectory)
{
//Func - Static method.
// Returns an IndexReader reading the index in an FSDirectory in the named path.
//Pre - directory represents a directory
// closeDir indicates if the directory needs to be closed
//Post - An IndexReader has been returned that reads the index located at directory
// in- & inter-process sync
SCOPED_LOCK_MUTEX(directory->THIS_LOCK)
//Instantiate an IndexReader::LockWith which can produce an IndexReader
LuceneLock* lock = directory->makeLock(QLatin1String("commit.lock"));
IndexReader::LockWith with(lock, directory);
IndexReader* ret = NULL;
try {
//Create an IndexReader reading the index
ret = with.runAndReturn();
} _CLFINALLY (
_CLDELETE(lock);
);
CND_CONDITION(ret != NULL, "ret is NULL");
ret->closeDirectory = closeDirectory;
return ret;
}
CL_NS(document)::Document* IndexReader::document(const int32_t n)
{
CL_NS(document)::Document* ret = _CLNEW CL_NS(document)::Document;
if (!document(n, ret))
_CLDELETE(ret);
return ret;
}
IndexReader* IndexReader::LockWith::doBody()
{
//Func - Reads the segmentinfo file and depending on the number of segments found
// it returns a SegmentsReader or a SegmentReader
//Pre - directory != NULL
//Post - Depending on the number of Segments present in directory this method
// returns an empty SegmentsReader when there are no segments, a SegmentReader when
// directory contains 1 segment and a nonempty SegmentsReader when directory
// contains multiple segements
CND_PRECONDITION(directory != NULL, "directory is NULL");
//Instantiate SegmentInfos
SegmentInfos* infos = _CLNEW SegmentInfos;
try {
//Have SegmentInfos read the segments file in directory
infos->read(directory);
} catch(...) {
//make sure infos is cleaned up
_CLDELETE(infos);
throw;
}
// If there is at least one segment (if infos.size() >= 1), the last
// SegmentReader object will close the directory when the SegmentReader
// object itself is closed (see SegmentReader::doClose).
// If there are no segments, there will be no "last SegmentReader object"
// to fulfill this responsibility, so we need to explicitly close the
// directory in the segmentsreader.close
//Count the number segments in the directory
const uint32_t nSegs = infos->size();
if (nSegs == 1 ) {
// index is optimized
return _CLNEW SegmentReader(infos, infos->info(0));
} else {
//Instantiate an array of pointers to SegmentReaders of size nSegs (The number of segments in the index)
IndexReader** readers = NULL;
if (nSegs > 0){
uint32_t infosize = infos->size();
readers = _CL_NEWARRAY(IndexReader*,infosize+1);
for (uint32_t i = 0; i < infosize; ++i) {
//Instantiate a SegementReader responsible for reading the i-th segment and store it in
//the readers array
readers[i] = _CLNEW SegmentReader(infos->info(i));
}
readers[infosize] = NULL;
}
//return an instance of SegmentsReader which is a reader that manages all Segments
return _CLNEW MultiReader(directory, infos, readers);
}// end if
}
uint64_t IndexReader::lastModified(const QString& directory)
{
//Func - Static method
// Returns the time the index in the named directory was last modified.
//Pre - directory != NULL and contains the path name of the directory to check
//Post - The last modified time of the index has been returned
CND_PRECONDITION(!directory.isEmpty(), "directory is NULL");
return FSDirectory::fileModified(directory, QLatin1String("segments"));
}
int64_t IndexReader::getCurrentVersion(Directory* directory)
{
// in- & inter-process sync
SCOPED_LOCK_MUTEX(directory->THIS_LOCK)
int64_t ret = 0;
bool locked = false;
LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
try {
locked = commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT);
ret = SegmentInfos::readCurrentVersion(directory);
} _CLFINALLY (
if (locked)
commitLock->release();
_CLDELETE(commitLock);
)
return ret;
}
int64_t IndexReader::getCurrentVersion(const QString& directory)
{
Directory* dir = FSDirectory::getDirectory(directory, false);
int64_t version = getCurrentVersion(dir);
dir->close();
_CLDECDELETE(dir);
return version;
}
int64_t IndexReader::getVersion()
{
return segmentInfos->getVersion();
}
bool IndexReader::isCurrent()
{
// in- & inter-process sync
SCOPED_LOCK_MUTEX(directory->THIS_LOCK)
bool ret = false;
bool locked = false;
LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
try {
locked = commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT);
ret = SegmentInfos::readCurrentVersion(directory)
== segmentInfos->getVersion();
} _CLFINALLY(
if (locked)
commitLock->release();
_CLDELETE(commitLock);
)
return ret;
}
uint64_t IndexReader::lastModified(const Directory* directory)
{
//Func - Static method
// Returns the time the index in this directory was last modified.
//Pre - directory contains a valid reference
//Post - The last modified time of the index has been returned
return directory->fileModified(QLatin1String("segments"));
}
bool IndexReader::indexExists(const QString& directory)
{
//Func - Static method
// Checks if an index exists in the named directory
//Pre - directory != NULL
//Post - Returns true if an index exists at the specified directory->
// If the directory does not exist or if there is no index in it.
// false is returned.
CND_PRECONDITION(!directory.isEmpty(), "directory is NULL");
return QFile(directory + QLatin1String("/segments")).exists();
}
void IndexReader::setNorm(int32_t doc, const TCHAR* field, uint8_t value)
{
SCOPED_LOCK_MUTEX(THIS_LOCK)
if(directoryOwner)
aquireWriteLock();
doSetNorm(doc, field, value);
hasChanges = true;
}
void IndexReader::aquireWriteLock()
{
if (stale) {
_CLTHROWA(CL_ERR_IO,
"IndexReader out of date and no longer valid for delete, "
"undelete, or setNorm operations");
}
if (writeLock == NULL) {
LuceneLock* writeLock = directory->makeLock(QLatin1String("write.lock"));
if (!writeLock->obtain(IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock
_CLTHROWA(CL_ERR_IO,"Index locked for write"); // + writeLock
this->writeLock = writeLock;
// we have to check whether index has changed since this reader was opened.
// if so, this reader is no longer valid for deletion
if (SegmentInfos::readCurrentVersion(directory) > segmentInfos->getVersion()) {
stale = true;
this->writeLock->release();
_CLDELETE(this->writeLock);
_CLTHROWA(CL_ERR_IO,"IndexReader out of date and no longer valid "
"for delete, undelete, or setNorm operations");
}
}
}
void IndexReader::setNorm(int32_t doc, const TCHAR* field, qreal value)
{
setNorm(doc, field, CL_NS(search)::Similarity::encodeNorm(value));
}
bool IndexReader::indexExists(const Directory* directory)
{
//Func - Static method
// Checks if an index exists in the directory
//Pre - directory is a valid reference
//Post - Returns true if an index exists at the specified directory->
// If the directory does not exist or if there is no index in it.
// false is returned.
return directory->fileExists(QLatin1String("segments"));
}
TermDocs* IndexReader::termDocs(Term* term) const
{
//Func - Returns an enumeration of all the documents which contain
// term. For each document, the document number, the frequency of
// the term in that document is also provided, for use in search scoring.
// Thus, this method implements the mapping:
//
// Term => <docNum, freq>*
// The enumeration is ordered by document number. Each document number
// is greater than all that precede it in the enumeration.
//Pre - term != NULL
//Post - A reference to TermDocs containing an enumeration of all found documents
// has been returned
CND_PRECONDITION(term != NULL, "term is NULL");
//Reference an instantiated TermDocs instance
TermDocs* _termDocs = termDocs();
//Seek all documents containing term
_termDocs->seek(term);
//return the enumaration
return _termDocs;
}
TermPositions* IndexReader::termPositions(Term* term) const
{
//Func - Returns an enumeration of all the documents which contain term. For each
// document, in addition to the document number and frequency of the term in
// that document, a list of all of the ordinal positions of the term in the document
// is available. Thus, this method implements the mapping:
//
// Term => <docNum, freq,<pos 1, pos 2, ...pos freq-1>>*
//
// This positional information faciliates phrase and proximity searching.
// The enumeration is ordered by document number. Each document number is greater than
// all that precede it in the enumeration.
//Pre - term != NULL
//Post - A reference to TermPositions containing an enumeration of all found documents
// has been returned
CND_PRECONDITION(term != NULL, "term is NULL");
//Reference an instantiated termPositions instance
TermPositions* _termPositions = termPositions();
//Seek all documents containing term
_termPositions->seek(term);
//return the enumeration
return _termPositions;
}
void IndexReader::deleteDocument(const int32_t docNum)
{
//Func - Deletes the document numbered docNum. Once a document is deleted it will not appear
// in TermDocs or TermPostitions enumerations. Attempts to read its field with the document
// method will result in an error. The presence of this document may still be reflected in
// the docFreq statistic, though this will be corrected eventually as the index is further modified.
//Pre - docNum >= 0
//Post - If successful the document identified by docNum has been deleted. If no writelock
// could be obtained an exception has been thrown stating that the index was locked or has no write access
SCOPED_LOCK_MUTEX(THIS_LOCK)
CND_PRECONDITION(docNum >= 0, "docNum is negative");
if (directoryOwner)
aquireWriteLock();
//Have the document identified by docNum deleted
doDelete(docNum);
hasChanges = true;
}
/**
* Commit changes resulting from delete, undeleteAll, or setNorm operations
*
* @throws IOException
*/
void IndexReader::commit()
{
SCOPED_LOCK_MUTEX(THIS_LOCK)
if(hasChanges){
if(directoryOwner){
{
SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
LuceneLock* commitLock = directory->makeLock(QLatin1String("commit.lock"));
IndexReader::CommitLockWith cl(commitLock,this);
cl.run();
_CLDELETE(commitLock);
}
if (writeLock != NULL) {
writeLock->release(); // release write lock
_CLDELETE(writeLock);
}
}else
doCommit();
}
hasChanges = false;
}
void IndexReader::undeleteAll()
{
SCOPED_LOCK_MUTEX(THIS_LOCK)
if(directoryOwner)
aquireWriteLock();
doUndeleteAll();
hasChanges = true;
}
int32_t IndexReader::deleteDocuments(Term* term)
{
//Func - Deletes all documents containing term. This is useful if one uses a
// document field to hold a unique ID string for the document. Then to delete such
// a document, one merely constructs a term with the appropriate field and the unique
// ID string as its text and passes it to this method.
//Pre - term != NULL
//Post - All documents containing term have been deleted. The number of deleted documents
// has been returned
CND_PRECONDITION(term != NULL, "term is NULL");
//Search for the documents contain term
TermDocs* docs = termDocs(term);
//Check if documents have been found
if ( docs == NULL ){
return 0;
}
//initialize
int32_t Counter = 0;
try {
//iterate through the found documents
while (docs->next()) {
//Delete the document
deleteDocument(docs->doc());
++Counter;
}
}_CLFINALLY(
//Close the enumeration
docs->close();
);
//Delete the enumeration of found documents
_CLDELETE( docs );
//Return the number of deleted documents
return Counter;
}
TCHAR** IndexReader::getFieldNames()
{
CL_NS(util)::StringArrayWithDeletor array;
getFieldNames(IndexReader::ALL, array);
array.setDoDelete(false);
TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1);
int j=0;
CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin();
while ( itr != array.end() ){
ret[j]=*itr;
++j;++itr;
}
ret[j]=NULL;
return ret;
}
TCHAR** IndexReader::getFieldNames(bool indexed)
{
CL_NS(util)::StringArrayWithDeletor array;
getFieldNames(indexed?IndexReader::INDEXED:IndexReader::UNINDEXED, array);
array.setDoDelete(false);
TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1);
int j=0;
CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin();
while ( itr != array.end() ){
ret[j]=*itr;
++j;++itr;
}
ret[j]=NULL;
return ret;
}
void IndexReader::close()
{
//Func - Closes files associated with this index and also saves any new deletions to disk.
// No other methods should be called after this has been called.
//Pre - true
//Post - All files associated with this index have been deleted and new deletions have been
// saved to disk
SCOPED_LOCK_MUTEX(THIS_LOCK)
CloseCallbackMap::iterator iter;
for (iter = closeCallbacks.begin(); iter != closeCallbacks.end(); iter++) {
CloseCallback callback = *iter->first;
callback(this, iter->second);
}
commit();
doClose();
if(closeDirectory) {
directory->close();
_CLDECDELETE(directory);
}
}
bool IndexReader::isLocked(Directory* directory)
{
//Func - Static method
// Checks if the index in the directory is currently locked.
//Pre - directory is a valid reference to a directory to check for a lock
//Post - Returns true if the index in the named directory is locked otherwise false
//Check the existence of the file write.lock and return true when it does and false
//when it doesn't
LuceneLock* l1 = directory->makeLock(QLatin1String("write.lock"));
LuceneLock* l2 = directory->makeLock(QLatin1String("commit.lock"));
bool ret = l1->isLocked() || l2->isLocked();
_CLDELETE(l1);
_CLDELETE(l2);
return ret;
}
bool IndexReader::isLocked(const QString& directory)
{
//Func - Static method
// Checks if the index in the named directory is currently locked.
//Pre - directory != NULL and contains the directory to check for a lock
//Post - Returns true if the index in the named directory is locked otherwise false
CND_PRECONDITION(!directory.isEmpty(), "directory is NULL");
Directory* dir = FSDirectory::getDirectory(directory, false);
bool ret = isLocked(dir);
dir->close();
_CLDECDELETE(dir);
return ret;
}
/** Returns true if there are norms stored for this field. */
bool IndexReader::hasNorms(const TCHAR* field)
{
// backward compatible implementation.
// SegmentReader has an efficient implementation.
return norms(field) != NULL;
}
void IndexReader::unlock(const QString& path)
{
FSDirectory* dir = FSDirectory::getDirectory(path, false);
unlock(dir);
dir->close();
_CLDECDELETE(dir);
}
void IndexReader::unlock(Directory* directory)
{
//Func - Static method
// Forcibly unlocks the index in the named directory->
// Caution: this should only be used by failure recovery code,
// when it is known that no other process nor thread is in fact
// currently accessing this index.
//Pre - directory is a valid reference to a directory
//Post - The directory has been forcibly unlocked
LuceneLock* lock;
lock = directory->makeLock(QLatin1String("write.lock"));
lock->release();
_CLDELETE(lock);
lock = directory->makeLock(QLatin1String("commit.lock"));
lock->release();
_CLDELETE(lock);
}
bool IndexReader::isLuceneFile(const QString& filename)
{
if (filename.isNull() || filename.isEmpty())
return false;
size_t len = filename.length();
if (len < 6) //need at least x.frx
return false;
if (filename == QLatin1String("segments"))
return true;
if (filename == QLatin1String("segments.new"))
return true;
if (filename == QLatin1String("deletable"))
return true;
QStringList extList;
extList << QLatin1String(".cfs")
<< QLatin1String(".fnm") << QLatin1String(".fdx") << QLatin1String(".fdt")
<< QLatin1String(".tii") << QLatin1String(".tis") << QLatin1String(".frq")
<< QLatin1String(".prx") << QLatin1String(".del") << QLatin1String(".tvx")
<< QLatin1String(".tvd") << QLatin1String(".tvf") << QLatin1String(".tvp");
QString suffix = filename.right(4);
if (extList.contains(suffix, Qt::CaseInsensitive))
return true;
if (suffix.leftRef(2) == QLatin1String(".f")) {
suffix = suffix.remove(0, 2);
if (suffix.length() > 0) {
for (int i = 0; i < suffix.length(); ++i) {
if (!suffix.at(i).isDigit())
return false;
}
return true;
}
}
return false;
}
void IndexReader::addCloseCallback(CloseCallback callback, void* parameter)
{
closeCallbacks.put(callback, parameter);
}
// #pragma mark -- IndexReader::LockWith
IndexReader::LockWith::LockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir)
: CL_NS(store)::LuceneLockWith<IndexReader*>(lock, IndexWriter::COMMIT_LOCK_TIMEOUT)
{
this->directory = dir;
}
// #pragma mark -- IndexReader::CommitLockWith
IndexReader::CommitLockWith::CommitLockWith(CL_NS(store)::LuceneLock* lock, IndexReader* r)
: CL_NS(store)::LuceneLockWith<void>(lock,IndexWriter::COMMIT_LOCK_TIMEOUT)
, reader(r)
{
}
void IndexReader::CommitLockWith::doBody()
{
reader->doCommit();
reader->segmentInfos->write(reader->getDirectory());
}
CL_NS_END