blob: ceb6735cbafad6a6a2ca03695397d80c94b917c6 [file] [log] [blame]
/*
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
*
* Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
*/
#include "CLucene/StdHeader.h"
#include "FieldsWriter.h"
#include "CLucene/util/VoidMap.h"
#include "CLucene/util/Reader.h"
#include "CLucene/util/Misc.h"
#include "CLucene/store/Directory.h"
#include "CLucene/store/IndexOutput.h"
#include "CLucene/document/Document.h"
#include "CLucene/document/Field.h"
#include "FieldInfos.h"
CL_NS_USE(store)
CL_NS_USE(util)
CL_NS_USE(document)
CL_NS_DEF(index)
FieldsWriter::FieldsWriter(Directory* d, const QString& segment, FieldInfos* fn)
: fieldInfos(fn)
{
//Func - Constructor
//Pre - d contains a valid reference to a directory
// segment != NULL and contains the name of the segment
//Post - fn contains a valid reference toa a FieldInfos
CND_PRECONDITION(!segment.isEmpty(), "segment is NULL");
QString buf = Misc::segmentname(segment, QLatin1String(".fdt"));
fieldsStream = d->createOutput(buf);
buf = Misc::segmentname(segment, QLatin1String(".fdx"));
indexStream = d->createOutput(buf);
CND_CONDITION(indexStream != NULL, "indexStream is NULL");
}
FieldsWriter::~FieldsWriter()
{
//Func - Destructor
//Pre - true
//Post - Instance has been destroyed
close();
}
void FieldsWriter::close()
{
//Func - Closes all streams and frees all resources
//Pre - true
//Post - All streams have been closed all resources have been freed
//Check if fieldsStream is valid
if (fieldsStream) {
//Close fieldsStream
fieldsStream->close();
_CLDELETE(fieldsStream);
}
//Check if indexStream is valid
if (indexStream) {
//Close indexStream
indexStream->close();
_CLDELETE(indexStream);
}
}
void FieldsWriter::addDocument(Document* doc)
{
//Func - Adds a document
//Pre - doc contains a valid reference to a Document
// indexStream != NULL
// fieldsStream != NULL
//Post - The document doc has been added
CND_PRECONDITION(indexStream != NULL, "indexStream is NULL");
CND_PRECONDITION(fieldsStream != NULL, "fieldsStream is NULL");
indexStream->writeLong(fieldsStream->getFilePointer());
int32_t storedCount = 0;
DocumentFieldEnumeration* fields = doc->fields();
while (fields->hasMoreElements()) {
Field* field = fields->nextElement();
if (field->isStored())
storedCount++;
}
_CLDELETE(fields);
fieldsStream->writeVInt(storedCount);
fields = doc->fields();
while (fields->hasMoreElements()) {
Field* field = fields->nextElement();
if (field->isStored()) {
fieldsStream->writeVInt(fieldInfos->fieldNumber(field->name()));
uint8_t bits = 0;
if (field->isTokenized())
bits |= FieldsWriter::FIELD_IS_TOKENIZED;
if (field->isBinary())
bits |= FieldsWriter::FIELD_IS_BINARY;
if (field->isCompressed())
bits |= FieldsWriter::FIELD_IS_COMPRESSED;
fieldsStream->writeByte(bits);
if ( field->isCompressed()) {
_CLTHROWA(CL_ERR_Runtime,
"CLucene does not directly support compressed fields. "
"Write a compressed byte array instead");
} else {
// FEATURE: this problem in Java Lucene too, if using Reader,
// data is not stored.
//
// TODO: this is a logic bug...
// if the field is stored, and indexed, and is using a reader
// the field wont get indexed
//
// if we could write zero prefixed vints (therefore static
// length), then we could write a reader directly to the field
// indexoutput and then go back and write the data length.
// however this is not supported in lucene yet...
// if this is ever implemented, then it would make sense to
// also be able to combine the FieldsWriter and
// DocumentWriter::invertDocument process, and use a
// streamfilter to write the field data while the documentwrite
// analyses the document! how cool would that be! it would cut
// out all these buffers!!!
// compression is disabled for the current field
if (field->isBinary()) {
// TODO: since we currently don't support static length vints,
// we have to read the entire stream into memory first.... ugly!
jstreams::StreamBase<char>* stream = field->streamValue();
const char* sd;
// how do we make sure we read the entire index in now???
// TODO: we need to have a max amount, and guarantee its all
// in or throw an error...
int32_t rl = stream->read(sd,10000000,0);
if ( rl < 0 ) {
// TODO: could we detect this earlier and not actually
// write the field??
fieldsStream->writeVInt(0);
} else {
// TODO: if this int could be written with a constant
// length, then the stream could be read and written a
// bit at a time then the length is re-written at the end.
fieldsStream->writeVInt(rl);
fieldsStream->writeBytes((uint8_t*)sd, rl);
}
} else if (field->stringValue() == NULL ) {
// we must be using readerValue
CND_PRECONDITION(!field->isIndexed(),
"Cannot store reader if it is indexed too")
Reader* r = field->readerValue();
//read the entire string
const TCHAR* rv;
int64_t rl = r->read(rv, LUCENE_INT32_MAX_SHOULDBE);
if ( rl > LUCENE_INT32_MAX_SHOULDBE )
_CLTHROWA(CL_ERR_Runtime, "Field length too long");
else if ( rl < 0 )
rl = 0;
fieldsStream->writeString( rv, (int32_t)rl);
} else if (field->stringValue() != NULL ) {
fieldsStream->writeString(field->stringValue(),
_tcslen(field->stringValue()));
} else {
_CLTHROWA(CL_ERR_Runtime, "No values are set for the field");
}
}
}
}
_CLDELETE(fields);
}
CL_NS_END