blob: e3f9d1cc24f029cfa115d166b6e3fbdfff79ad06 [file] [log] [blame]
/*
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
*
* Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
*/
#include "CLucene/StdHeader.h"
#include "FieldsReader.h"
#include "CLucene/util/VoidMap.h"
#include "CLucene/util/Misc.h"
#include "CLucene/store/Directory.h"
#include "CLucene/document/Document.h"
#include "CLucene/document/Field.h"
#include "FieldInfos.h"
#include "FieldsWriter.h"
CL_NS_USE(store)
CL_NS_USE(document)
CL_NS_USE(util)
CL_NS_DEF(index)
FieldsReader::FieldsReader(Directory* d, const QString& segment, FieldInfos* fn)
: fieldInfos(fn)
{
//Func - Constructor
//Pre - d contains a valid reference to a Directory
// segment != NULL
// fn contains a valid reference to a FieldInfos
//Post - The instance has been created
CND_PRECONDITION(!segment.isEmpty(), "segment != NULL");
QString buf = Misc::segmentname(segment, QLatin1String(".fdt"));
fieldsStream = d->openInput(buf);
buf = Misc::segmentname(segment, QLatin1String(".fdx"));
indexStream = d->openInput(buf);
_size = (int32_t)indexStream->length() / 8;
}
FieldsReader::~FieldsReader()
{
//Func - Destructor
//Pre - true
//Post - The instance has been destroyed
close();
}
void FieldsReader::close()
{
//Func - Closes the FieldsReader
//Pre - true
//Post - The FieldsReader has been closed
if (fieldsStream) {
fieldsStream->close();
_CLDELETE(fieldsStream);
}
if(indexStream) {
indexStream->close();
_CLDELETE(indexStream);
}
}
int32_t FieldsReader::size() const
{
return _size;
}
bool FieldsReader::doc(int32_t n, Document* doc)
{
if ( n * 8L > indexStream->length() )
return false;
indexStream->seek(n * 8L);
int64_t position = indexStream->readLong();
fieldsStream->seek(position);
int32_t numFields = fieldsStream->readVInt();
for (int32_t i = 0; i < numFields; i++) {
int32_t fieldNumber = fieldsStream->readVInt();
FieldInfo* fi = fieldInfos->fieldInfo(fieldNumber);
if ( fi == NULL )
_CLTHROWA(CL_ERR_IO, "Field stream is invalid");
uint8_t bits = fieldsStream->readByte();
if ((bits & FieldsWriter::FIELD_IS_BINARY) != 0) {
int32_t fieldLen = fieldsStream->readVInt();
FieldsReader::FieldsStreamHolder* subStream = new
FieldsReader::FieldsStreamHolder(fieldsStream, fieldLen);
uint8_t bits = Field::STORE_YES;
Field* f = _CLNEW Field(
fi->name, // name
subStream, // read value
bits);
doc->add(*f);
//now skip over the rest of the field
if (fieldsStream->getFilePointer() + fieldLen
== fieldsStream->length()) {
// set to eof
fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen - 1);
fieldsStream->readByte();
} else {
fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen);
}
} else {
uint8_t bits = Field::STORE_YES;
if (fi->isIndexed && (bits & FieldsWriter::FIELD_IS_TOKENIZED)!=0 )
bits |= Field::INDEX_TOKENIZED;
else if (fi->isIndexed && (bits & FieldsWriter::FIELD_IS_TOKENIZED) == 0)
bits |= Field::INDEX_UNTOKENIZED;
else
bits |= Field::INDEX_NO;
if (fi->storeTermVector) {
if (fi->storeOffsetWithTermVector) {
if (fi->storePositionWithTermVector) {
bits |= Field::TERMVECTOR_WITH_OFFSETS;
bits |= Field::TERMVECTOR_WITH_POSITIONS;
} else {
bits |= Field::TERMVECTOR_WITH_OFFSETS;
}
} else if (fi->storePositionWithTermVector) {
bits |= Field::TERMVECTOR_WITH_POSITIONS;
} else {
bits |= Field::TERMVECTOR_YES;
}
} else {
bits |= Field::TERMVECTOR_NO;
}
if ( (bits & FieldsWriter::FIELD_IS_COMPRESSED) != 0 ) {
bits |= Field::STORE_COMPRESS;
int32_t fieldLen = fieldsStream->readVInt();
FieldsStreamHolder* subStream = new
FieldsStreamHolder(fieldsStream, fieldLen);
// TODO: we dont have gzip inputstream available, must alert
// user to somehow use a gzip inputstream
Field* f = _CLNEW Field(
fi->name, // name
subStream, // read value
bits);
f->setOmitNorms(fi->omitNorms);
doc->add(*f);
// now skip over the rest of the field
if (fieldsStream->getFilePointer() + fieldLen
== fieldsStream->length()) {
//set to eof
fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen - 1);
fieldsStream->readByte();
} else {
fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen);
}
} else {
TCHAR* fvalue = fieldsStream->readString(true);
Field* f = _CLNEW Field(
fi->name, // name
fvalue, // read value
bits);
// TODO: could optimise this
_CLDELETE_CARRAY(fvalue);
f->setOmitNorms(fi->omitNorms);
doc->add(*f);
}
}
}
return true;
}
FieldsReader::FieldsStreamHolder::FieldsStreamHolder(IndexInput* indexInput,
int32_t subLength)
{
this->indexInput = indexInput->clone();
this->indexInputStream = new IndexInputStream(this->indexInput);
this->subStream = new jstreams::SubInputStream<char>(indexInputStream,
subLength);
this->size = subStream->getSize();
this->position = subStream->getPosition();
this->error = subStream->getError();
this->status = subStream->getStatus();
}
FieldsReader::FieldsStreamHolder::~FieldsStreamHolder()
{
delete subStream;
delete indexInputStream;
indexInput->close();
_CLDELETE(indexInput);
}
int32_t FieldsReader::FieldsStreamHolder::read(const char*& start, int32_t _min,
int32_t _max)
{
int32_t ret = subStream->read(start,_min,_max);
this->position = subStream->getPosition();
this->error = subStream->getError();
this->status = subStream->getStatus();
return ret;
}
int64_t FieldsReader::FieldsStreamHolder::skip(int64_t ntoskip)
{
int64_t ret = subStream->skip(ntoskip);
this->position = subStream->getPosition();
this->error = subStream->getError();
this->status = subStream->getStatus();
return ret;
}
int64_t FieldsReader::FieldsStreamHolder::reset(int64_t pos)
{
int64_t ret = subStream->reset(pos);
this->position = subStream->getPosition();
this->error = subStream->getError();
this->status = subStream->getStatus();
return ret;
}
CL_NS_END