blob: 4a896a5c526be8e6ea9a99d26009880531d7c16a [file] [log] [blame]
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_search_SearchHeader_
#define _lucene_search_SearchHeader_
#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif
#include "CLucene/index/IndexReader.h"
#include "CLucene/index/Term.h"
#include "Filter.h"
#include "CLucene/document/Document.h"
#include "Sort.h"
#include "CLucene/util/VoidList.h"
#include "Explanation.h"
#include "Similarity.h"
CL_NS_DEF(search)
//predefine classes
class Scorer;
class Query;
class Hits;
class Sort;
class FieldDoc;
class TopFieldDocs;
/** Expert: Returned by low-level search implementations.
* @see TopDocs */
struct ScoreDoc {
/** Expert: A hit document's number.
* @see Searcher#doc(int32_t)
*/
int32_t doc;
/** Expert: The score of this document for the query. */
qreal score;
};
/** Expert: Returned by low-level search implementations.
* @see Searcher#search(Query,Filter,int32_t) */
class TopDocs:LUCENE_BASE {
public:
/** Expert: The total number of hits for the query.
* @see Hits#length()
*/
int32_t totalHits;
/** Expert: The top hits for the query. */
ScoreDoc* scoreDocs;
int32_t scoreDocsLength;
/** Expert: Constructs a TopDocs. TopDocs takes ownership of the ScoreDoc array*/
TopDocs(const int32_t th, ScoreDoc* sds, int32_t scoreDocsLength);
~TopDocs();
};
// Lower-level search API.
// @see Searcher#search(Query,HitCollector)
class HitCollector: LUCENE_BASE {
public:
/** Called once for every non-zero scoring document, with the document number
* and its score.
*
* <P>If, for example, an application wished to collect all of the hits for a
* query in a BitSet, then it might:<pre>
* Searcher searcher = new IndexSearcher(indexReader);
* final BitSet bits = new BitSet(indexReader.maxDoc());
* searcher.search(query, new HitCollector() {
* public void collect(int32_t doc, float score) {
* bits.set(doc);
* }
* });
* </pre>
*
* <p>Note: This is called in an inner search loop. For good search
* performance, implementations of this method should not call
* {@link Searcher#doc(int32_t)} or
* {@link IndexReader#document(int32_t)} on every
* document number encountered. Doing so can slow searches by an order
* of magnitude or more.
* <p>Note: The <code>score</code> passed to this method is a raw score.
* In other words, the score will not necessarily be a float whose value is
* between 0 and 1.
*/
virtual void collect(const int32_t doc, const qreal score) = 0;
virtual ~HitCollector(){}
};
/** Expert: Calculate query weights and build query scorers.
*
* <p>A Weight is constructed by a query, given a Searcher ({@link
* Query#_createWeight(Searcher)}). The {@link #sumOfSquaredWeights()} method
* is then called on the top-level query to compute the query normalization
* factor (@link Similarity#queryNorm(qreal)}). This factor is then passed to
* {@link #normalize(qreal)}. At this point the weighting is complete and a
* scorer may be constructed by calling {@link #scorer(IndexReader)}.
*/
class Weight: LUCENE_BASE {
public:
virtual ~Weight(){
};
/** The query that this concerns. */
virtual Query* getQuery() = 0;
/** The weight for this query. */
virtual qreal getValue() = 0;
/** The sum of squared weights of contained query clauses. */
virtual qreal sumOfSquaredWeights() = 0;
/** Assigns the query normalization factor to this. */
virtual void normalize(qreal norm) = 0;
/** Constructs a scorer for this. */
virtual Scorer* scorer(CL_NS(index)::IndexReader* reader) = 0;
/** An explanation of the score computation for the named document. */
virtual void explain(CL_NS(index)::IndexReader* reader, int32_t doc, Explanation* ret) = 0;
virtual TCHAR* toString(){
return STRDUP_TtoT(_T("Weight"));
}
};
class HitDoc:LUCENE_BASE {
public:
qreal score;
int32_t id;
CL_NS(document)::Document* doc;
HitDoc* next; // in doubly-linked cache
HitDoc* prev; // in doubly-linked cache
HitDoc(const qreal s, const int32_t i);
~HitDoc();
};
// A ranked list of documents, used to hold search results.
class Hits:LUCENE_BASE {
private:
Query* query;
Searcher* searcher;
Filter* filter;
const Sort* sort;
size_t _length; // the total number of hits
CL_NS(util)::CLVector<HitDoc*, CL_NS(util)::Deletor::Object<HitDoc> > hitDocs; // cache of hits retrieved
HitDoc* first; // head of LRU cache
HitDoc* last; // tail of LRU cache
int32_t numDocs; // number cached
int32_t maxDocs; // max to cache
public:
Hits(Searcher* s, Query* q, Filter* f, const Sort* sort=NULL);
~Hits();
/** Returns the total number of hits available in this set. */
int32_t length() const;
/** Returns the stored fields of the n<sup>th</sup> document in this set.
<p>Documents are cached, so that repeated requests for the same element may
return the same Document object.
*
* @memory Memory belongs to the hits object. Don't delete the return value.
*/
CL_NS(document)::Document& doc(const int32_t n);
/** Returns the id for the nth document in this set. */
int32_t id (const int32_t n);
/** Returns the score for the nth document in this set. */
qreal score(const int32_t n);
private:
// Tries to add new documents to hitDocs.
// Ensures that the hit numbered <code>_min</code> has been retrieved.
void getMoreDocs(const size_t _min);
HitDoc* getHitDoc(const size_t n);
void addToFront(HitDoc* hitDoc);
void remove(const HitDoc* hitDoc);
};
/** The interface for search implementations.
*
* <p>Implementations provide search over a single index, over multiple
* indices, and over indices on remote servers.
*/
class Searchable: LUCENE_BASE {
public:
virtual ~Searchable(){
}
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int32_t,qreal)} is called for every non-zero
* scoring document.
*
* <p>Applications should only use this if they need <i>all</i> of the
* matching documents. The high-level search API ({@link
* Searcher#search(Query*)}) is usually more efficient, as it skips
* non-high-scoring hits.
*
* @param query to match documents
* @param filter if non-null, a bitset used to eliminate some documents
* @param results to receive hits
*/
virtual void _search(Query* query, Filter* filter, HitCollector* results) = 0;
/** Frees resources associated with this Searcher.
* Be careful not to call this method while you are still using objects
* like {@link Hits}.
*/
virtual void close() = 0;
/** Expert: Returns the number of documents containing <code>term</code>.
* Called by search code to compute term weights.
* @see IndexReader#docFreq(Term).
*/
virtual int32_t docFreq(const CL_NS(index)::Term* term) const = 0;
/** Expert: Returns one greater than the largest possible document number.
* Called by search code to compute term weights.
* @see IndexReader#maxDoc().
*/
virtual int32_t maxDoc() const = 0;
/** Expert: Low-level search implementation. Finds the top <code>n</code>
* hits for <code>query</code>, applying <code>filter</code> if non-null.
*
* <p>Called by {@link Hits}.
*
* <p>Applications should usually call {@link Searcher#search(Query*)} or
* {@link Searcher#search(Query*,Filter*)} instead.
*/
virtual TopDocs* _search(Query* query, Filter* filter, const int32_t n) = 0;
/** Expert: Returns the stored fields of document <code>i</code>.
* Called by {@link HitCollector} implementations.
* @see IndexReader#document(int32_t).
*/
virtual bool doc(int32_t i, CL_NS(document)::Document* d) = 0;
_CL_DEPRECATED( doc(i, document) ) CL_NS(document)::Document* doc(const int32_t i);
/** Expert: called to re-write queries into primitive queries. */
virtual Query* rewrite(Query* query) = 0;
/** Returns an Explanation that describes how <code>doc</code> scored against
* <code>query</code>.
*
* <p>This is intended to be used in developing Similarity implementations,
* and, for good performance, should not be displayed with every hit.
* Computing an explanation is as expensive as executing the query over the
* entire index.
*/
virtual void explain(Query* query, int32_t doc, Explanation* ret) = 0;
/** Expert: Low-level search implementation with arbitrary sorting. Finds
* the top <code>n</code> hits for <code>query</code>, applying
* <code>filter</code> if non-null, and sorting the hits by the criteria in
* <code>sort</code>.
*
* <p>Applications should usually call {@link
* Searcher#search(Query,Filter,Sort)} instead.
*/
virtual TopFieldDocs* _search(Query* query, Filter* filter, const int32_t n, const Sort* sort) = 0;
};
/** An abstract base class for search implementations.
* Implements some common utility methods.
*/
class Searcher:public Searchable {
private:
/** The Similarity implementation used by this searcher. */
Similarity* similarity;
public:
Searcher(){
similarity = Similarity::getDefault();
}
virtual ~Searcher(){
}
// Returns the documents matching <code>query</code>.
Hits* search(Query* query) {
return search(query, (Filter*)NULL );
}
// Returns the documents matching <code>query</code> and
// <code>filter</code>.
Hits* search(Query* query, Filter* filter) {
return _CLNEW Hits(this, query, filter);
}
/** Returns documents matching <code>query</code> sorted by
* <code>sort</code>.
*/
Hits* search(Query* query, const Sort* sort){
return _CLNEW Hits(this, query, NULL, sort);
}
/** Returns documents matching <code>query</code> and <code>filter</code>,
* sorted by <code>sort</code>.
*/
Hits* search(Query* query, Filter* filter, const Sort* sort){
return _CLNEW Hits(this, query, filter, sort);
}
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int32_t ,qreal)} is called for every non-zero
* scoring document.
*
* <p>Applications should only use this if they need <i>all</i> of the
* matching documents. The high-level search API ({@link
* Searcher#search(Query*)}) is usually more efficient, as it skips
* non-high-scoring hits.
* <p>Note: The <code>score</code> passed to this method is a raw score.
* In other words, the score will not necessarily be a float whose value is
* between 0 and 1.
*/
void _search(Query* query, HitCollector* results) {
Searchable::_search(query, NULL, results);
}
/** Expert: Set the Similarity implementation used by this Searcher.
*
* @see Similarity#setDefault(Similarity)
*/
void setSimilarity(Similarity* similarity) {
this->similarity = similarity;
}
/** Expert: Return the Similarity implementation used by this Searcher.
*
* <p>This defaults to the current value of {@link Similarity#getDefault()}.
*/
Similarity* getSimilarity(){
return this->similarity;
}
};
/** The abstract base class for queries.
<p>Instantiable subclasses are:
<ul>
<li> {@link TermQuery}
<li> {@link MultiTermQuery}
<li> {@link BooleanQuery}
<li> {@link WildcardQuery}
<li> {@link PhraseQuery}
<li> {@link PrefixQuery}
<li> {@link PhrasePrefixQuery}
<li> {@link FuzzyQuery}
<li> {@link RangeQuery}
<li> {@link spans.SpanQuery}
</ul>
<p>A parser for queries is contained in:
<ul>
<li>{@link queryParser.QueryParser QueryParser}
</ul>
*/
class Query :LUCENE_BASE {
private:
// query boost factor
qreal boost;
protected:
Query(const Query& clone);
public:
Query();
virtual ~Query();
/** Sets the boost for this query clause to <code>b</code>. Documents
* matching this clause will (in addition to the normal weightings) have
* their score multiplied by <code>b</code>.
*/
void setBoost(qreal b);
/** Gets the boost for this clause. Documents matching
* this clause will (in addition to the normal weightings) have their score
* multiplied by <code>b</code>. The boost is 1.0 by default.
*/
qreal getBoost() const;
/** Expert: Constructs an initializes a Weight for a top-level query. */
Weight* weight(Searcher* searcher);
/** Expert: called to re-write queries into primitive queries. */
virtual Query* rewrite(CL_NS(index)::IndexReader* reader);
/** Expert: called when re-writing queries under MultiSearcher.
*
* <p>Only implemented by derived queries, with no
* {@link #_createWeight(Searcher)} implementatation.
*/
virtual Query* combine(Query** queries);
/** Expert: merges the clauses of a set of BooleanQuery's into a single
* BooleanQuery.
*
*<p>A utility for use by {@link #combine(Query[])} implementations.
*/
static Query* mergeBooleanQueries(Query** queries);
/** Expert: Returns the Similarity implementation to be used for this query.
* Subclasses may override this method to specify their own Similarity
* implementation, perhaps one that delegates through that of the Searcher.
* By default the Searcher's Similarity implementation is returned.*/
Similarity* getSimilarity(Searcher* searcher);
/** Returns a clone of this query. */
virtual Query* clone() const = 0;
virtual const TCHAR* getQueryName() const = 0;
bool instanceOf(const TCHAR* other) const;
/** Prints a query to a string, with <code>field</code> as the default field
* for terms. <p>The representation used is one that is readable by
* {@link queryParser.QueryParser QueryParser}
* (although, if the query was created by the parser, the printed
* representation may not be exactly what was parsed).
*/
virtual TCHAR* toString(const TCHAR* field) const = 0;
virtual bool equals(Query* other) const = 0;
virtual size_t hashCode() const = 0;
/** Prints a query to a string. */
TCHAR* toString() const;
/** Expert: Constructs an appropriate Weight implementation for this query.
*
* <p>Only implemented by primitive queries, which re-write to themselves.
* <i>This is an Internal function</i>
*/
virtual Weight* _createWeight(Searcher* searcher);
};
CL_NS_END
#endif