blob: bed7f0d61c4230148e15ab0ef1b5d18446997ffd [file] [log] [blame]
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "MultiSearcher.h"
#include "SearchHeader.h"
#include "HitQueue.h"
#include "CLucene/document/Document.h"
#include "CLucene/index/Term.h"
#include "FieldDocSortedHitQueue.h"
CL_NS_USE(index)
CL_NS_USE(util)
CL_NS_USE(document)
CL_NS_DEF(search)
/** Creates a searcher which searches <i>searchers</i>. */
MultiSearcher::MultiSearcher(Searchable** _searchables):
_maxDoc(0) {
searchablesLen = 0;
while ( _searchables[searchablesLen] != NULL )
++searchablesLen;
searchables=_CL_NEWARRAY(Searchable*,searchablesLen+1);
starts = _CL_NEWARRAY(int32_t,searchablesLen + 1); // build starts array
for (int32_t i = 0; i < searchablesLen; ++i) {
searchables[i]=_searchables[i];
starts[i] = _maxDoc;
_maxDoc += searchables[i]->maxDoc(); // compute maxDocs
}
starts[searchablesLen] = _maxDoc;
}
MultiSearcher::~MultiSearcher() {
_CLDELETE_ARRAY(searchables);
_CLDELETE_ARRAY(starts);
}
// inherit javadoc
void MultiSearcher::close() {
for (int32_t i = 0; i < searchablesLen; ++i){
searchables[i]->close();
searchables[i]=NULL;
}
}
int32_t MultiSearcher::docFreq(const Term* term) const {
int32_t docFreq = 0;
for (int32_t i = 0; i < searchablesLen; ++i)
docFreq += searchables[i]->docFreq(term);
return docFreq;
}
/** For use by {@link HitCollector} implementations. */
bool MultiSearcher::doc(int32_t n, Document* d) {
int32_t i = subSearcher(n); // find searcher index
return searchables[i]->doc(n - starts[i], d); // dispatch to searcher
}
int32_t MultiSearcher::searcherIndex(int32_t n) const{
return subSearcher(n);
}
/** Returns index of the searcher for document <code>n</code> in the array
* used to construct this searcher. */
int32_t MultiSearcher::subSearcher(int32_t n) const{
// replace w/ call to Arrays.binarySearch in Java 1.2
int32_t lo = 0; // search starts array
int32_t hi = searchablesLen - 1; // for first element less
// than n, return its index
int32_t mid,midValue;
while (hi >= lo) {
mid = (lo + hi) >> 1;
midValue = starts[mid];
if (n < midValue)
hi = mid - 1;
else if (n > midValue)
lo = mid + 1;
else{ // found a match
while (mid+1 < searchablesLen && starts[mid+1] == midValue) {
++mid; // scan to last match
}
return mid;
}
}
return hi;
}
/** Returns the document number of document <code>n</code> within its
* sub-index. */
int32_t MultiSearcher::subDoc(int32_t n) const{
return n - starts[subSearcher(n)];
}
int32_t MultiSearcher::maxDoc() const{
return _maxDoc;
}
TopDocs* MultiSearcher::_search(Query* query, Filter* filter, const int32_t nDocs) {
HitQueue* hq = _CLNEW HitQueue(nDocs);
int32_t totalHits = 0;
TopDocs* docs;
int32_t j;
ScoreDoc* scoreDocs;
for (int32_t i = 0; i < searchablesLen; i++) { // search each searcher
docs = searchables[i]->_search(query, filter, nDocs);
totalHits += docs->totalHits; // update totalHits
scoreDocs = docs->scoreDocs;
for ( j = 0; j <docs->scoreDocsLength; ++j) { // merge scoreDocs int_to hq
scoreDocs[j].doc += starts[i]; // convert doc
if ( !hq->insert(scoreDocs[j]))
break; // no more scores > minScore
}
_CLDELETE(docs);
}
int32_t scoreDocsLen = hq->size();
scoreDocs = _CL_NEWARRAY(ScoreDoc, scoreDocsLen);
{//MSVC 6 scope fix
for (int32_t i = scoreDocsLen-1; i >= 0; --i) // put docs in array
scoreDocs[i] = hq->pop();
}
//cleanup
_CLDELETE(hq);
return _CLNEW TopDocs(totalHits, scoreDocs, scoreDocsLen);
}
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int32_t,qreal)} is called for every non-zero
* scoring document.
*
* <p>Applications should only use this if they need <i>all</i> of the
* matching documents. The high-level search API ({@link
* Searcher#search(Query)}) is usually more efficient, as it skips
* non-high-scoring hits.
*
* @param query to match documents
* @param filter if non-null, a bitset used to eliminate some documents
* @param results to receive hits
*/
void MultiSearcher::_search(Query* query, Filter* filter, HitCollector* results){
for (int32_t i = 0; i < searchablesLen; ++i) {
/* DSR:CL_BUG: Old implementation leaked and was misconceived. We need
** to have the original HitCollector ($results) collect *all* hits;
** the MultiHitCollector instantiated below serves only to adjust
** (forward by starts[i]) the docNo passed to $results.
** Old implementation instead created a sort of linked list of
** MultiHitCollectors that applied the adjustments in $starts
** cumulatively (and was never deleted). */
HitCollector *docNoAdjuster = _CLNEW MultiHitCollector(results, starts[i]);
searchables[i]->_search(query, filter, docNoAdjuster);
_CLDELETE(docNoAdjuster);
}
}
TopFieldDocs* MultiSearcher::_search (Query* query, Filter* filter, const int32_t n, const Sort* sort){
FieldDocSortedHitQueue* hq = NULL;
int32_t totalHits = 0;
TopFieldDocs* docs;
int32_t j;
FieldDoc** fieldDocs;
for (int32_t i = 0; i < searchablesLen; ++i) { // search each searcher
docs = searchables[i]->_search (query, filter, n, sort);
if (hq == NULL){
hq = _CLNEW FieldDocSortedHitQueue (docs->fields, n);
docs->fields = NULL; //hit queue takes fields memory
}
totalHits += docs->totalHits; // update totalHits
fieldDocs = docs->fieldDocs;
for(j = 0;j<docs->scoreDocsLength;++j){ // merge scoreDocs into hq
fieldDocs[j]->scoreDoc.doc += starts[i]; // convert doc
if (!hq->insert (fieldDocs[j]) )
break; // no more scores > minScore
}
for ( int32_t x=0;x<j;++x )
fieldDocs[x]=NULL; //move ownership of FieldDoc to the hitqueue
_CLDELETE(docs);
}
int32_t hqlen = hq->size();
fieldDocs = _CL_NEWARRAY(FieldDoc*,hqlen);
for (j = hqlen - 1; j >= 0; j--) // put docs in array
fieldDocs[j] = hq->pop();
SortField** hqFields = hq->getFields();
hq->setFields(NULL); //move ownership of memory over to TopFieldDocs
_CLDELETE(hq);
return _CLNEW TopFieldDocs (totalHits, fieldDocs, hqlen, hqFields);
}
Query* MultiSearcher::rewrite(Query* original) {
Query** queries = _CL_NEWARRAY(Query*,searchablesLen+1);
for (int32_t i = 0; i < searchablesLen; ++i)
queries[i] = searchables[i]->rewrite(original);
queries[searchablesLen]=NULL;
return original->combine(queries);
}
void MultiSearcher::explain(Query* query, int32_t doc, Explanation* ret) {
int32_t i = subSearcher(doc); // find searcher index
searchables[i]->explain(query,doc-starts[i], ret); // dispatch to searcher
}
MultiHitCollector::MultiHitCollector(HitCollector* _results, int32_t _start):
results(_results),
start(_start) {
}
void MultiHitCollector::collect(const int32_t doc, const qreal score) {
results->collect(doc + start, score);
}
CL_NS_END