/*
 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
 *
 * Distributable under the terms of either the Apache License (Version 2.0) or 
 * the GNU Lesser General Public License, as specified in the COPYING file.
 *
 * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
*/
#include "CLucene/StdHeader.h"
#include "TermInfosReader.h"

#include "CLucene/store/Directory.h"
#include "CLucene/util/Misc.h"
#include "FieldInfos.h"
#include "Term.h"
#include "Terms.h"
#include "TermInfo.h"
#include "TermInfosWriter.h"

CL_NS_USE(store)
CL_NS_USE(util)
CL_NS_DEF(index)

TermInfosReader::TermInfosReader(Directory* dir, const QString& seg,
    FieldInfos* fis)
    : directory(dir)
    , fieldInfos (fis)
{
    //Func - Constructor.
    //       Reads the TermInfos file (.tis) and eventually the Term Info Index file (.tii)
    //Pre  - dir is a reference to a valid Directory 
    //       Fis contains a valid reference to an FieldInfos instance
    //       seg != NULL and contains the name of the segment
    //Post - An instance has been created and the index named seg has been read. (Remember
    //       a segment is nothing more then an independently readable index)

    CND_PRECONDITION(!seg.isEmpty(), "seg is NULL");

    //Initialize the name of the segment
    segment    =  seg;
    //There are no indexTerms yet
    indexTerms    = NULL;
    //So there are no indexInfos
    indexInfos    = NULL;
    //So there are no indexPointers
    indexPointers = NULL; 	
    //Create a filname fo a Term Info File
    QString tisFile = Misc::segmentname(segment, QLatin1String(".tis"));
    QString tiiFile = Misc::segmentname(segment, QLatin1String(".tii"));

    //Create an SegmentTermEnum for storing all the terms read of the segment
    origEnum = _CLNEW SegmentTermEnum( directory->openInput( tisFile ), fieldInfos, false);
    indexEnum = _CLNEW SegmentTermEnum( directory->openInput( tiiFile ), fieldInfos, true);

    //Check if enumerator points to a valid instance
    CND_CONDITION(origEnum != NULL, "No memory could be allocated for orig enumerator");
    CND_CONDITION(indexEnum != NULL, "No memory could be allocated for index enumerator");

    //Get the size of the enumeration and store it in size
    _size =  origEnum->size;
}

TermInfosReader::~TermInfosReader()
{
    //Func - Destructor
    //Pre  - true
    //Post - The instance has been destroyed

    //Close the TermInfosReader to be absolutly sure that enumerator has been closed
    //and the arrays indexTerms, indexPointers and indexInfos and  their elements 
    //have been destroyed
    close();
}

void TermInfosReader::close()
{
    //Func - Close the enumeration of TermInfos
    //Pre  - true
    //Post - The _enumeration has been closed and the arrays

    //Check if indexTerms and indexInfos exist
    if (indexTerms && indexInfos){
        //Iterate through arrays indexTerms and indexPointer to
        //destroy their elements
#ifdef _DEBUG
        for (int32_t i = 0; i < indexTermsLength; ++i) {
            if (indexTerms[i].__cl_refcount != 1) {
                CND_PRECONDITION(indexTerms[i].__cl_refcount == 1,
                    "TermInfosReader term was references more than internally");
            }
            //   _CLDECDELETE(indexTerms[i]);
            //_CLDELETE(indexInfos[i]);
        }
#endif
        //Delete the arrays
        _CLDELETE_ARRAY(indexTerms);
        _CLDELETE_ARRAY(indexInfos);
    }

    //Delete the arrays
    _CLDELETE_ARRAY(indexPointers);

    if (origEnum != NULL) {
        origEnum->close();

        //Get a pointer to IndexInput used by the enumeration but 
        //instantiated in the constructor by directory.open( tisFile )
        IndexInput *is = origEnum->input;

        //Delete the enumuration enumerator
        _CLDELETE(origEnum);

        //Delete the IndexInput 
        _CLDELETE(is);	
    }

    if (indexEnum != NULL){
        indexEnum->close();

        //Get a pointer to IndexInput used by the enumeration but 
        //instantiated in the constructor by directory.open( tiiFile )
        IndexInput *is = indexEnum->input;

        //Delete the enumuration enumerator
        _CLDELETE(indexEnum);

        //Delete the IndexInput 
        _CLDELETE(is);	
    }
}

int64_t TermInfosReader::size() const
{
    //Func - Return the size of the enumeration of TermInfos
    //Pre  - true
    //Post - size has been returened

    return _size;
}

Term* TermInfosReader::get(const int32_t position)
{
    //Func - Returns the nth term in the set
    //Pre  - position > = 0
    //Post - The n-th term in the set has been returned

    //Check if the size is 0 because then there are no terms
    if (_size == 0) 
        return NULL;

    SegmentTermEnum* enumerator = getEnum();

    if (enumerator != NULL //an enumeration exists
        && enumerator->term(false) != NULL // term is at or past current
        && position >= enumerator->position
        && position < (enumerator->position + enumerator->indexInterval)) {
        return scanEnum(position);			  // can avoid seek
    }

    //random-access: must seek
    seekEnum(position / enumerator->indexInterval); 

    //Get the Term at position
    return scanEnum(position);
}

// TODO: currently there is no way of cleaning up a thread, if the thread ends.
// we are stuck with the terminfosreader of that thread. Hopefully this won't
// be too big a problem... solutions anyone?
SegmentTermEnum* TermInfosReader::getEnum()
{
    SegmentTermEnum* termEnum = enumerators.get();
    if (termEnum == NULL) {
        termEnum = terms();
        enumerators.set(termEnum);
    }
    return termEnum;
}

TermInfo* TermInfosReader::get(const Term* term)
{
    //Func - Returns a TermInfo for a term
    //Pre  - term holds a valid reference to term
    //Post - if term can be found its TermInfo has been returned otherwise NULL

    //If the size of the enumeration is 0 then no Terms have been read
    if (_size == 0)
        return NULL;

    ensureIndexIsRead();

    // optimize sequential access: first try scanning cached enum w/o seeking
    SegmentTermEnum* enumerator = getEnum();

    // optimize sequential access: first try scanning cached enumerator w/o seeking
    // if the current term of the enumeration enumerator is not at the end
    if (enumerator->term(false) != NULL
        // AND there exists a previous current called prev and term is
        // positioned after this prev
        && ((enumerator->prev != NULL && term->compareTo(enumerator->prev) > 0)
        // OR term is positioned at the same position as the current of
        // enumerator or at a higher position
        || term->compareTo(enumerator->term(false)) >= 0)) {
            //Calculate the offset for the position
            int32_t _enumOffset = (int32_t)
                (enumerator->position / enumerator->indexInterval) + 1;

        // but before end of block the length of indexTerms (the number of
        // terms in enumerator) equals _enum_offset
        if (indexTermsLength == _enumOffset
            // OR term is positioned in front of term found at _enumOffset in
            // indexTerms
            || term->compareTo(&indexTerms[_enumOffset]) < 0) {
                //no need to seek, retrieve the TermInfo for term
                return scanEnum(term);
        }
    }

    //Reposition current term in the enumeration 
    seekEnum(getIndexOffset(term));
    //Return the TermInfo for term
    return scanEnum(term);
}

int64_t TermInfosReader::getPosition(const Term* term)
{
    //Func - Returns the position of a Term in the set
    //Pre  - term holds a valid reference to a Term
    //       enumerator != NULL
    //Post - If term was found then its position is returned otherwise -1

    //if the enumeration is empty then return -1
    if (_size == 0)
        return -1;

    ensureIndexIsRead();

    //Retrieve the indexOffset for term
    int32_t indexOffset = getIndexOffset(term);
    seekEnum(indexOffset);

    SegmentTermEnum* enumerator = getEnum();

    while(term->compareTo(enumerator->term(false)) > 0 && enumerator->next()) {}

    if (term->equals(enumerator->term(false)))
        return enumerator->position;

    return -1;
}

SegmentTermEnum* TermInfosReader::terms(const Term* term)
{
    //Func - Returns an enumeration of terms starting at or after the named term.
    //       If term is null then enumerator is set to the beginning
    //Pre  - term holds a valid reference to a Term
    //       enumerator != NULL
    //Post - An enumeration of terms starting at or after the named term has been returned

    SegmentTermEnum* enumerator = NULL;
    if (term != NULL) {
        //Seek enumerator to term; delete the new TermInfo that's returned.
        TermInfo* ti = get(term);
        _CLDELETE(ti);
        enumerator = getEnum();
    } else {
        enumerator = origEnum;
    }
    //Clone the entire enumeration
    SegmentTermEnum* cln = enumerator->clone();

    //Check if cln points to a valid instance
    CND_CONDITION(cln != NULL, "cln is NULL");

    return cln;
}

void TermInfosReader::ensureIndexIsRead()
{
    //Func - Reads the term info index file or .tti file.
    //       This file contains every IndexInterval-th entry from the .tis file, 
    //       along with its location in the "tis" file. This is designed to be
    //       read entirely into memory and used to provide random access to the
    //       "tis" file.
    //Pre  - indexTerms    = NULL
    //       indexInfos    = NULL
    //       indexPointers = NULL
    //Post - The term info index file has been read into memory

    SCOPED_LOCK_MUTEX(THIS_LOCK)

    if ( indexTerms != NULL )
        return;

    try {
        indexTermsLength = (size_t)indexEnum->size;

        // Instantiate an block of Term's,so that each one doesn't have to be new'd
        indexTerms    = _CL_NEWARRAY(Term,indexTermsLength);

        // Check if is indexTerms is a valid array
        CND_CONDITION(indexTerms != NULL,
            "No memory could be allocated for indexTerms");

        // Instantiate an big block of TermInfo's, so that each one doesn't
        // have to be new'd
        indexInfos = _CL_NEWARRAY(TermInfo,indexTermsLength);

        // Check if is indexInfos is a valid array
        CND_CONDITION(indexInfos != NULL,
            "No memory could be allocated for indexInfos");

        // Instantiate an array indexPointers that contains pointers to the
        // term info index file
        indexPointers = _CL_NEWARRAY(int64_t,indexTermsLength);

        // Check if is indexPointers is a valid array
        CND_CONDITION(indexPointers != NULL,
            "No memory could be allocated for indexPointers");

        //Iterate through the terms of indexEnum
        for (int32_t i = 0; indexEnum->next(); ++i) {
            indexTerms[i].set(indexEnum->term(false), indexEnum->term(false)->text());
            indexEnum->getTermInfo(&indexInfos[i]);
            indexPointers[i] = indexEnum->indexPointer;
        }
    } _CLFINALLY (
        indexEnum->close(); 
        // Close and delete the IndexInput is. The close is done by the destructor.
        _CLDELETE( indexEnum->input );
        _CLDELETE( indexEnum ); 
    );
}

int32_t TermInfosReader::getIndexOffset(const Term* term)
{
    //Func - Returns the offset of the greatest index entry which is less than
    //       or equal to term.
    //Pre  - term holds a reference to a valid term
    //       indexTerms != NULL
    //Post - The new offset has been returned

    //Check if is indexTerms is a valid array
    CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");

    int32_t lo = 0;					  
    int32_t hi = indexTermsLength - 1;
    int32_t mid;
    int32_t delta;

    while (hi >= lo) {
        //Start in the middle betwee hi and lo
        mid = (lo + hi) >> 1;

        //Check if is indexTerms[mid] is a valid instance of Term
        CND_PRECONDITION(&indexTerms[mid] != NULL, "indexTerms[mid] is NULL");
        CND_PRECONDITION(mid < indexTermsLength, "mid >= indexTermsLength");

        //Determine if term is before mid or after mid
        delta = term->compareTo(&indexTerms[mid]);
        if (delta < 0) {
            //Calculate the new hi   
            hi = mid - 1;
        } else if (delta > 0) {
            //Calculate the new lo 
            lo = mid + 1;
        } else {
            //term has been found so return its position
            return mid;
        }
    }
    // the new starting offset
    return hi;
}

void TermInfosReader::seekEnum(const int32_t indexOffset)
{
    //Func - Reposition the current Term and TermInfo to indexOffset
    //Pre  - indexOffset >= 0
    //       indexTerms    != NULL
    //       indexInfos    != NULL
    //       indexPointers != NULL
    //Post - The current Term and Terminfo have been repositioned to indexOffset

    CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative number");
    CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
    CND_PRECONDITION(indexInfos != NULL, "indexInfos is NULL");
    CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");

    SegmentTermEnum* enumerator =  getEnum();
    enumerator->seek(indexPointers[indexOffset],
        (indexOffset * enumerator->indexInterval) - 1,
        &indexTerms[indexOffset], &indexInfos[indexOffset]);
}

TermInfo* TermInfosReader::scanEnum(const Term* term)
{
    //Func - Scans the Enumeration of terms for term and returns the
    //       corresponding TermInfo instance if found. The search is started
    //       from the current term.
    //Pre  - term contains a valid reference to a Term
    //       enumerator != NULL
    //Post - if term has been found the corresponding TermInfo has been returned
    //       otherwise NULL has been returned

    SegmentTermEnum* enumerator = getEnum();
    enumerator->scanTo(term);

    //Check if the at the position the Term term can be found
    if (enumerator->term(false) != NULL && term->equals(enumerator->term(false))) {
        //Return the TermInfo instance about term
        return enumerator->getTermInfo();
    }

    //term was not found so no TermInfo can be returned
    return NULL;
}

Term* TermInfosReader::scanEnum(const int32_t position)
{
    //Func - Scans the enumeration to the requested position and returns the
    //       Term located at that position
    //Pre  - position > = 0
    //       enumerator != NULL
    //Post - The Term at the requested position has been returned

    SegmentTermEnum* enumerator = getEnum();

    // As long the position of the enumeration enumerator is smaller than the
    // requested one
    while(enumerator->position < position) {
        //Move the current of enumerator to the next
        if (!enumerator->next()) {
            //If there is no next it means that the requested position was to big
            return NULL;
        }
    }

    //Return the Term a the requested position
    return enumerator->term();
}

CL_NS_END
