/*------------------------------------------------------------------------------ | |
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team | |
* | |
* Distributable under the terms of either the Apache License (Version 2.0) or | |
* the GNU Lesser General Public License, as specified in the COPYING file. | |
------------------------------------------------------------------------------*/ | |
#ifndef _lucene_search_SearchHeader_ | |
#define _lucene_search_SearchHeader_ | |
#if defined(_LUCENE_PRAGMA_ONCE) | |
# pragma once | |
#endif | |
#include "CLucene/index/IndexReader.h" | |
#include "CLucene/index/Term.h" | |
#include "Filter.h" | |
#include "CLucene/document/Document.h" | |
#include "Sort.h" | |
#include "CLucene/util/VoidList.h" | |
#include "Explanation.h" | |
#include "Similarity.h" | |
CL_NS_DEF(search) | |
//predefine classes | |
class Scorer; | |
class Query; | |
class Hits; | |
class Sort; | |
class FieldDoc; | |
class TopFieldDocs; | |
/** Expert: Returned by low-level search implementations. | |
* @see TopDocs */ | |
struct ScoreDoc { | |
/** Expert: A hit document's number. | |
* @see Searcher#doc(int32_t) | |
*/ | |
int32_t doc; | |
/** Expert: The score of this document for the query. */ | |
qreal score; | |
}; | |
/** Expert: Returned by low-level search implementations. | |
* @see Searcher#search(Query,Filter,int32_t) */ | |
class TopDocs:LUCENE_BASE { | |
public: | |
/** Expert: The total number of hits for the query. | |
* @see Hits#length() | |
*/ | |
int32_t totalHits; | |
/** Expert: The top hits for the query. */ | |
ScoreDoc* scoreDocs; | |
int32_t scoreDocsLength; | |
/** Expert: Constructs a TopDocs. TopDocs takes ownership of the ScoreDoc array*/ | |
TopDocs(const int32_t th, ScoreDoc* sds, int32_t scoreDocsLength); | |
~TopDocs(); | |
}; | |
// Lower-level search API. | |
// @see Searcher#search(Query,HitCollector) | |
class HitCollector: LUCENE_BASE { | |
public: | |
/** Called once for every non-zero scoring document, with the document number | |
* and its score. | |
* | |
* <P>If, for example, an application wished to collect all of the hits for a | |
* query in a BitSet, then it might:<pre> | |
* Searcher searcher = new IndexSearcher(indexReader); | |
* final BitSet bits = new BitSet(indexReader.maxDoc()); | |
* searcher.search(query, new HitCollector() { | |
* public void collect(int32_t doc, float score) { | |
* bits.set(doc); | |
* } | |
* }); | |
* </pre> | |
* | |
* <p>Note: This is called in an inner search loop. For good search | |
* performance, implementations of this method should not call | |
* {@link Searcher#doc(int32_t)} or | |
* {@link IndexReader#document(int32_t)} on every | |
* document number encountered. Doing so can slow searches by an order | |
* of magnitude or more. | |
* <p>Note: The <code>score</code> passed to this method is a raw score. | |
* In other words, the score will not necessarily be a float whose value is | |
* between 0 and 1. | |
*/ | |
virtual void collect(const int32_t doc, const qreal score) = 0; | |
virtual ~HitCollector(){} | |
}; | |
/** Expert: Calculate query weights and build query scorers. | |
* | |
* <p>A Weight is constructed by a query, given a Searcher ({@link | |
* Query#_createWeight(Searcher)}). The {@link #sumOfSquaredWeights()} method | |
* is then called on the top-level query to compute the query normalization | |
* factor (@link Similarity#queryNorm(qreal)}). This factor is then passed to | |
* {@link #normalize(qreal)}. At this point the weighting is complete and a | |
* scorer may be constructed by calling {@link #scorer(IndexReader)}. | |
*/ | |
class Weight: LUCENE_BASE { | |
public: | |
virtual ~Weight(){ | |
}; | |
/** The query that this concerns. */ | |
virtual Query* getQuery() = 0; | |
/** The weight for this query. */ | |
virtual qreal getValue() = 0; | |
/** The sum of squared weights of contained query clauses. */ | |
virtual qreal sumOfSquaredWeights() = 0; | |
/** Assigns the query normalization factor to this. */ | |
virtual void normalize(qreal norm) = 0; | |
/** Constructs a scorer for this. */ | |
virtual Scorer* scorer(CL_NS(index)::IndexReader* reader) = 0; | |
/** An explanation of the score computation for the named document. */ | |
virtual void explain(CL_NS(index)::IndexReader* reader, int32_t doc, Explanation* ret) = 0; | |
virtual TCHAR* toString(){ | |
return STRDUP_TtoT(_T("Weight")); | |
} | |
}; | |
class HitDoc:LUCENE_BASE { | |
public: | |
qreal score; | |
int32_t id; | |
CL_NS(document)::Document* doc; | |
HitDoc* next; // in doubly-linked cache | |
HitDoc* prev; // in doubly-linked cache | |
HitDoc(const qreal s, const int32_t i); | |
~HitDoc(); | |
}; | |
// A ranked list of documents, used to hold search results. | |
class Hits:LUCENE_BASE { | |
private: | |
Query* query; | |
Searcher* searcher; | |
Filter* filter; | |
const Sort* sort; | |
size_t _length; // the total number of hits | |
CL_NS(util)::CLVector<HitDoc*, CL_NS(util)::Deletor::Object<HitDoc> > hitDocs; // cache of hits retrieved | |
HitDoc* first; // head of LRU cache | |
HitDoc* last; // tail of LRU cache | |
int32_t numDocs; // number cached | |
int32_t maxDocs; // max to cache | |
public: | |
Hits(Searcher* s, Query* q, Filter* f, const Sort* sort=NULL); | |
~Hits(); | |
/** Returns the total number of hits available in this set. */ | |
int32_t length() const; | |
/** Returns the stored fields of the n<sup>th</sup> document in this set. | |
<p>Documents are cached, so that repeated requests for the same element may | |
return the same Document object. | |
* | |
* @memory Memory belongs to the hits object. Don't delete the return value. | |
*/ | |
CL_NS(document)::Document& doc(const int32_t n); | |
/** Returns the id for the nth document in this set. */ | |
int32_t id (const int32_t n); | |
/** Returns the score for the nth document in this set. */ | |
qreal score(const int32_t n); | |
private: | |
// Tries to add new documents to hitDocs. | |
// Ensures that the hit numbered <code>_min</code> has been retrieved. | |
void getMoreDocs(const size_t _min); | |
HitDoc* getHitDoc(const size_t n); | |
void addToFront(HitDoc* hitDoc); | |
void remove(const HitDoc* hitDoc); | |
}; | |
/** The interface for search implementations. | |
* | |
* <p>Implementations provide search over a single index, over multiple | |
* indices, and over indices on remote servers. | |
*/ | |
class Searchable: LUCENE_BASE { | |
public: | |
virtual ~Searchable(){ | |
} | |
/** Lower-level search API. | |
* | |
* <p>{@link HitCollector#collect(int32_t,qreal)} is called for every non-zero | |
* scoring document. | |
* | |
* <p>Applications should only use this if they need <i>all</i> of the | |
* matching documents. The high-level search API ({@link | |
* Searcher#search(Query*)}) is usually more efficient, as it skips | |
* non-high-scoring hits. | |
* | |
* @param query to match documents | |
* @param filter if non-null, a bitset used to eliminate some documents | |
* @param results to receive hits | |
*/ | |
virtual void _search(Query* query, Filter* filter, HitCollector* results) = 0; | |
/** Frees resources associated with this Searcher. | |
* Be careful not to call this method while you are still using objects | |
* like {@link Hits}. | |
*/ | |
virtual void close() = 0; | |
/** Expert: Returns the number of documents containing <code>term</code>. | |
* Called by search code to compute term weights. | |
* @see IndexReader#docFreq(Term). | |
*/ | |
virtual int32_t docFreq(const CL_NS(index)::Term* term) const = 0; | |
/** Expert: Returns one greater than the largest possible document number. | |
* Called by search code to compute term weights. | |
* @see IndexReader#maxDoc(). | |
*/ | |
virtual int32_t maxDoc() const = 0; | |
/** Expert: Low-level search implementation. Finds the top <code>n</code> | |
* hits for <code>query</code>, applying <code>filter</code> if non-null. | |
* | |
* <p>Called by {@link Hits}. | |
* | |
* <p>Applications should usually call {@link Searcher#search(Query*)} or | |
* {@link Searcher#search(Query*,Filter*)} instead. | |
*/ | |
virtual TopDocs* _search(Query* query, Filter* filter, const int32_t n) = 0; | |
/** Expert: Returns the stored fields of document <code>i</code>. | |
* Called by {@link HitCollector} implementations. | |
* @see IndexReader#document(int32_t). | |
*/ | |
virtual bool doc(int32_t i, CL_NS(document)::Document* d) = 0; | |
_CL_DEPRECATED( doc(i, document) ) CL_NS(document)::Document* doc(const int32_t i); | |
/** Expert: called to re-write queries into primitive queries. */ | |
virtual Query* rewrite(Query* query) = 0; | |
/** Returns an Explanation that describes how <code>doc</code> scored against | |
* <code>query</code>. | |
* | |
* <p>This is intended to be used in developing Similarity implementations, | |
* and, for good performance, should not be displayed with every hit. | |
* Computing an explanation is as expensive as executing the query over the | |
* entire index. | |
*/ | |
virtual void explain(Query* query, int32_t doc, Explanation* ret) = 0; | |
/** Expert: Low-level search implementation with arbitrary sorting. Finds | |
* the top <code>n</code> hits for <code>query</code>, applying | |
* <code>filter</code> if non-null, and sorting the hits by the criteria in | |
* <code>sort</code>. | |
* | |
* <p>Applications should usually call {@link | |
* Searcher#search(Query,Filter,Sort)} instead. | |
*/ | |
virtual TopFieldDocs* _search(Query* query, Filter* filter, const int32_t n, const Sort* sort) = 0; | |
}; | |
/** An abstract base class for search implementations. | |
* Implements some common utility methods. | |
*/ | |
class Searcher:public Searchable { | |
private: | |
/** The Similarity implementation used by this searcher. */ | |
Similarity* similarity; | |
public: | |
Searcher(){ | |
similarity = Similarity::getDefault(); | |
} | |
virtual ~Searcher(){ | |
} | |
// Returns the documents matching <code>query</code>. | |
Hits* search(Query* query) { | |
return search(query, (Filter*)NULL ); | |
} | |
// Returns the documents matching <code>query</code> and | |
// <code>filter</code>. | |
Hits* search(Query* query, Filter* filter) { | |
return _CLNEW Hits(this, query, filter); | |
} | |
/** Returns documents matching <code>query</code> sorted by | |
* <code>sort</code>. | |
*/ | |
Hits* search(Query* query, const Sort* sort){ | |
return _CLNEW Hits(this, query, NULL, sort); | |
} | |
/** Returns documents matching <code>query</code> and <code>filter</code>, | |
* sorted by <code>sort</code>. | |
*/ | |
Hits* search(Query* query, Filter* filter, const Sort* sort){ | |
return _CLNEW Hits(this, query, filter, sort); | |
} | |
/** Lower-level search API. | |
* | |
* <p>{@link HitCollector#collect(int32_t ,qreal)} is called for every non-zero | |
* scoring document. | |
* | |
* <p>Applications should only use this if they need <i>all</i> of the | |
* matching documents. The high-level search API ({@link | |
* Searcher#search(Query*)}) is usually more efficient, as it skips | |
* non-high-scoring hits. | |
* <p>Note: The <code>score</code> passed to this method is a raw score. | |
* In other words, the score will not necessarily be a float whose value is | |
* between 0 and 1. | |
*/ | |
void _search(Query* query, HitCollector* results) { | |
Searchable::_search(query, NULL, results); | |
} | |
/** Expert: Set the Similarity implementation used by this Searcher. | |
* | |
* @see Similarity#setDefault(Similarity) | |
*/ | |
void setSimilarity(Similarity* similarity) { | |
this->similarity = similarity; | |
} | |
/** Expert: Return the Similarity implementation used by this Searcher. | |
* | |
* <p>This defaults to the current value of {@link Similarity#getDefault()}. | |
*/ | |
Similarity* getSimilarity(){ | |
return this->similarity; | |
} | |
}; | |
/** The abstract base class for queries. | |
<p>Instantiable subclasses are: | |
<ul> | |
<li> {@link TermQuery} | |
<li> {@link MultiTermQuery} | |
<li> {@link BooleanQuery} | |
<li> {@link WildcardQuery} | |
<li> {@link PhraseQuery} | |
<li> {@link PrefixQuery} | |
<li> {@link PhrasePrefixQuery} | |
<li> {@link FuzzyQuery} | |
<li> {@link RangeQuery} | |
<li> {@link spans.SpanQuery} | |
</ul> | |
<p>A parser for queries is contained in: | |
<ul> | |
<li>{@link queryParser.QueryParser QueryParser} | |
</ul> | |
*/ | |
class Query :LUCENE_BASE { | |
private: | |
// query boost factor | |
qreal boost; | |
protected: | |
Query(const Query& clone); | |
public: | |
Query(); | |
virtual ~Query(); | |
/** Sets the boost for this query clause to <code>b</code>. Documents | |
* matching this clause will (in addition to the normal weightings) have | |
* their score multiplied by <code>b</code>. | |
*/ | |
void setBoost(qreal b); | |
/** Gets the boost for this clause. Documents matching | |
* this clause will (in addition to the normal weightings) have their score | |
* multiplied by <code>b</code>. The boost is 1.0 by default. | |
*/ | |
qreal getBoost() const; | |
/** Expert: Constructs an initializes a Weight for a top-level query. */ | |
Weight* weight(Searcher* searcher); | |
/** Expert: called to re-write queries into primitive queries. */ | |
virtual Query* rewrite(CL_NS(index)::IndexReader* reader); | |
/** Expert: called when re-writing queries under MultiSearcher. | |
* | |
* <p>Only implemented by derived queries, with no | |
* {@link #_createWeight(Searcher)} implementatation. | |
*/ | |
virtual Query* combine(Query** queries); | |
/** Expert: merges the clauses of a set of BooleanQuery's into a single | |
* BooleanQuery. | |
* | |
*<p>A utility for use by {@link #combine(Query[])} implementations. | |
*/ | |
static Query* mergeBooleanQueries(Query** queries); | |
/** Expert: Returns the Similarity implementation to be used for this query. | |
* Subclasses may override this method to specify their own Similarity | |
* implementation, perhaps one that delegates through that of the Searcher. | |
* By default the Searcher's Similarity implementation is returned.*/ | |
Similarity* getSimilarity(Searcher* searcher); | |
/** Returns a clone of this query. */ | |
virtual Query* clone() const = 0; | |
virtual const TCHAR* getQueryName() const = 0; | |
bool instanceOf(const TCHAR* other) const; | |
/** Prints a query to a string, with <code>field</code> as the default field | |
* for terms. <p>The representation used is one that is readable by | |
* {@link queryParser.QueryParser QueryParser} | |
* (although, if the query was created by the parser, the printed | |
* representation may not be exactly what was parsed). | |
*/ | |
virtual TCHAR* toString(const TCHAR* field) const = 0; | |
virtual bool equals(Query* other) const = 0; | |
virtual size_t hashCode() const = 0; | |
/** Prints a query to a string. */ | |
TCHAR* toString() const; | |
/** Expert: Constructs an appropriate Weight implementation for this query. | |
* | |
* <p>Only implemented by primitive queries, which re-write to themselves. | |
* <i>This is an Internal function</i> | |
*/ | |
virtual Weight* _createWeight(Searcher* searcher); | |
}; | |
CL_NS_END | |
#endif |