/*------------------------------------------------------------------------------ | |
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team | |
* | |
* Distributable under the terms of either the Apache License (Version 2.0) or | |
* the GNU Lesser General Public License, as specified in the COPYING file. | |
------------------------------------------------------------------------------*/ | |
#include "CLucene/StdHeader.h" | |
#include "BooleanQuery.h" | |
#include "BooleanClause.h" | |
#include "CLucene/index/IndexReader.h" | |
#include "CLucene/util/StringBuffer.h" | |
#include "CLucene/util/Arrays.h" | |
#include "SearchHeader.h" | |
#include "BooleanScorer.h" | |
#include "Scorer.h" | |
CL_NS_USE(index) | |
CL_NS_USE(util) | |
CL_NS_DEF(search) | |
BooleanQuery::BooleanQuery(): | |
clauses(true) | |
{ | |
} | |
BooleanQuery::BooleanQuery(const BooleanQuery& clone): | |
Query(clone) | |
{ | |
for ( uint32_t i=0;i<clone.clauses.size();i++ ){ | |
BooleanClause* clause = clone.clauses[i]->clone(); | |
clause->deleteQuery=true; | |
add(clause); | |
} | |
} | |
BooleanQuery::~BooleanQuery(){ | |
clauses.clear(); | |
} | |
size_t BooleanQuery::hashCode() const { | |
//todo: do cachedHashCode, and invalidate on add/remove clause | |
size_t ret = 0; | |
for (uint32_t i = 0 ; i < clauses.size(); i++) { | |
BooleanClause* c = clauses[i]; | |
ret = 31 * ret + c->hashCode(); | |
} | |
ret = ret ^ Similarity::floatToByte(getBoost()); | |
return ret; | |
} | |
const TCHAR* BooleanQuery::getQueryName() const{ | |
return getClassName(); | |
} | |
const TCHAR* BooleanQuery::getClassName(){ | |
return _T("BooleanQuery"); | |
} | |
/** | |
* Default value is 1024. Use <code>org.apache.lucene.maxClauseCount</code> | |
* system property to override. | |
*/ | |
size_t BooleanQuery::maxClauseCount = LUCENE_BOOLEANQUERY_MAXCLAUSECOUNT; | |
size_t BooleanQuery::getMaxClauseCount(){ | |
return maxClauseCount; | |
} | |
void BooleanQuery::setMaxClauseCount(size_t maxClauseCount){ | |
BooleanQuery::maxClauseCount = maxClauseCount; | |
} | |
void BooleanQuery::add(Query* query, const bool deleteQuery, const bool required, const bool prohibited) { | |
BooleanClause* bc = _CLNEW BooleanClause(query,deleteQuery,required, prohibited); | |
try{ | |
add(bc); | |
}catch(...){ | |
_CLDELETE(bc); | |
throw; | |
} | |
} | |
void BooleanQuery::add(BooleanClause* clause) { | |
if (clauses.size() >= getMaxClauseCount()) | |
_CLTHROWA(CL_ERR_TooManyClauses,"Too Many Clauses"); | |
clauses.push_back(clause); | |
} | |
size_t BooleanQuery::getClauseCount() const { | |
return (int32_t) clauses.size(); | |
} | |
TCHAR* BooleanQuery::toString(const TCHAR* field) const{ | |
StringBuffer buffer; | |
if (getBoost() != 1.0) { | |
buffer.append(_T("(")); | |
} | |
for (uint32_t i = 0 ; i < clauses.size(); i++) { | |
BooleanClause* c = clauses[i]; | |
if (c->prohibited) | |
buffer.append(_T("-")); | |
else if (c->required) | |
buffer.append(_T("+")); | |
if ( c->query->instanceOf(BooleanQuery::getClassName()) ) { // wrap sub-bools in parens | |
buffer.append(_T("(")); | |
TCHAR* buf = c->query->toString(field); | |
buffer.append(buf); | |
_CLDELETE_CARRAY( buf ); | |
buffer.append(_T(")")); | |
} else { | |
TCHAR* buf = c->query->toString(field); | |
buffer.append(buf); | |
_CLDELETE_CARRAY( buf ); | |
} | |
if (i != clauses.size()-1) | |
buffer.append(_T(" ")); | |
if (getBoost() != 1.0) { | |
buffer.append(_T(")^")); | |
buffer.appendFloat(getBoost(),1); | |
} | |
} | |
return buffer.toString(); | |
} | |
BooleanClause** BooleanQuery::getClauses() const | |
{ | |
CND_MESSAGE(false, "Warning: BooleanQuery::getClauses() is deprecated") | |
BooleanClause** ret = _CL_NEWARRAY(BooleanClause*, clauses.size()+1); | |
getClauses(ret); | |
return ret; | |
} | |
void BooleanQuery::getClauses(BooleanClause** ret) const | |
{ | |
size_t size=clauses.size(); | |
for ( uint32_t i=0;i<size;i++ ) | |
ret[i] = clauses[i]; | |
} | |
Query* BooleanQuery::rewrite(IndexReader* reader) { | |
if (clauses.size() == 1) { // optimize 1-clause queries | |
BooleanClause* c = clauses[0]; | |
if (!c->prohibited) { // just return clause | |
Query* query = c->query->rewrite(reader); // rewrite first | |
//if the query doesn't actually get re-written, | |
//then return a clone (because the BooleanQuery | |
//will register different to the returned query. | |
if ( query == c->query ) | |
query = query->clone(); | |
if (getBoost() != 1.0f) { // incorporate boost | |
query->setBoost(getBoost() * query->getBoost()); | |
} | |
return query; | |
} | |
} | |
BooleanQuery* clone = NULL; // recursively rewrite | |
for (uint32_t i = 0 ; i < clauses.size(); i++) { | |
BooleanClause* c = clauses[i]; | |
Query* query = c->query->rewrite(reader); | |
if (query != c->query) { // clause rewrote: must clone | |
if (clone == NULL) | |
clone = (BooleanQuery*)this->clone(); | |
//todo: check if delete query should be on... | |
//in fact we should try and get rid of these | |
//for compatibility sake | |
clone->clauses.set (i, _CLNEW BooleanClause(query, true, c->required, c->prohibited)); | |
} | |
} | |
if (clone != NULL) { | |
return clone; // some clauses rewrote | |
} else | |
return this; // no clauses rewrote | |
} | |
Query* BooleanQuery::clone() const{ | |
BooleanQuery* clone = _CLNEW BooleanQuery(*this); | |
return clone; | |
} | |
/** Returns true iff <code>o</code> is equal to this. */ | |
bool BooleanQuery::equals(Query* o)const { | |
if (!(o->instanceOf(BooleanQuery::getClassName()))) | |
return false; | |
const BooleanQuery* other = (BooleanQuery*)o; | |
bool ret = (this->getBoost() == other->getBoost()); | |
if ( ret ){ | |
CLListEquals<BooleanClause,BooleanClause::Compare, const ClausesType, const ClausesType> comp; | |
ret = comp.equals(&this->clauses,&other->clauses); | |
} | |
return ret; | |
} | |
qreal BooleanQuery::BooleanWeight::getValue() { return parentQuery->getBoost(); } | |
Query* BooleanQuery::BooleanWeight::getQuery() { return (Query*)parentQuery; } | |
BooleanQuery::BooleanWeight::BooleanWeight(Searcher* searcher, | |
CLVector<BooleanClause*,Deletor::Object<BooleanClause> >* clauses, BooleanQuery* parentQuery) | |
{ | |
this->searcher = searcher; | |
this->parentQuery = parentQuery; | |
this->clauses = clauses; | |
for (uint32_t i = 0 ; i < clauses->size(); i++) { | |
weights.push_back((*clauses)[i]->query->_createWeight(searcher)); | |
} | |
} | |
BooleanQuery::BooleanWeight::~BooleanWeight(){ | |
this->weights.clear(); | |
} | |
qreal BooleanQuery::BooleanWeight::sumOfSquaredWeights() { | |
qreal sum = 0.0f; | |
for (uint32_t i = 0 ; i < weights.size(); i++) { | |
BooleanClause* c = (*clauses)[i]; | |
Weight* w = weights[i]; | |
if (!c->prohibited) | |
sum += w->sumOfSquaredWeights(); // sum sub weights | |
} | |
sum *= parentQuery->getBoost() * parentQuery->getBoost(); // boost each sub-weight | |
return sum ; | |
} | |
void BooleanQuery::BooleanWeight::normalize(qreal norm) { | |
norm *= parentQuery->getBoost(); // incorporate boost | |
for (uint32_t i = 0 ; i < weights.size(); i++) { | |
BooleanClause* c = (*clauses)[i]; | |
Weight* w = weights[i]; | |
if (!c->prohibited) | |
w->normalize(norm); | |
} | |
} | |
Scorer* BooleanQuery::BooleanWeight::scorer(IndexReader* reader){ | |
// First see if the (faster) ConjunctionScorer will work. This can be | |
// used when all clauses are required. Also, at this point a | |
// BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits | |
// from a BooleanScorer are not always sorted by document number (sigh) | |
// and hence BooleanScorer cannot implement skipTo() correctly, which is | |
// required by ConjunctionScorer. | |
bool allRequired = true; | |
bool noneBoolean = true; | |
{ //msvc6 scope fix | |
for (uint32_t i = 0 ; i < weights.size(); i++) { | |
BooleanClause* c = (*clauses)[i]; | |
if (!c->required) | |
allRequired = false; | |
if (c->query->instanceOf(BooleanQuery::getClassName())) | |
noneBoolean = false; | |
} | |
} | |
if (allRequired && noneBoolean) { // ConjunctionScorer is okay | |
ConjunctionScorer* result = | |
_CLNEW ConjunctionScorer(parentQuery->getSimilarity(searcher)); | |
for (uint32_t i = 0 ; i < weights.size(); i++) { | |
Weight* w = weights[i]; | |
Scorer* subScorer = w->scorer(reader); | |
if (subScorer == NULL) | |
return NULL; | |
result->add(subScorer); | |
} | |
return result; | |
} | |
// Use good-old BooleanScorer instead. | |
BooleanScorer* result = _CLNEW BooleanScorer(parentQuery->getSimilarity(searcher)); | |
{ //msvc6 scope fix | |
for (uint32_t i = 0 ; i < weights.size(); i++) { | |
BooleanClause* c = (*clauses)[i]; | |
Weight* w = weights[i]; | |
Scorer* subScorer = w->scorer(reader); | |
if (subScorer != NULL) | |
result->add(subScorer, c->required, c->prohibited); | |
else if (c->required) | |
return NULL; | |
} | |
} | |
return result; | |
} | |
void BooleanQuery::BooleanWeight::explain(IndexReader* reader, int32_t doc, Explanation* result){ | |
int32_t coord = 0; | |
int32_t maxCoord = 0; | |
qreal sum = 0.0f; | |
Explanation* sumExpl = _CLNEW Explanation; | |
for (uint32_t i = 0 ; i < weights.size(); i++) { | |
BooleanClause* c = (*clauses)[i]; | |
Weight* w = weights[i]; | |
Explanation* e = _CLNEW Explanation; | |
w->explain(reader, doc, e); | |
if (!c->prohibited) | |
maxCoord++; | |
if (e->getValue() > 0) { | |
if (!c->prohibited) { | |
sumExpl->addDetail(e); | |
sum += e->getValue(); | |
coord++; | |
e = NULL; //prevent e from being deleted | |
} else { | |
//we want to return something else... | |
_CLDELETE(sumExpl); | |
result->setValue(0.0f); | |
result->setDescription(_T("match prohibited")); | |
return; | |
} | |
} else if (c->required) { | |
_CLDELETE(sumExpl); | |
result->setValue(0.0f); | |
result->setDescription(_T("match prohibited")); | |
return; | |
} | |
_CLDELETE(e); | |
} | |
sumExpl->setValue(sum); | |
if (coord == 1){ // only one clause matched | |
Explanation* tmp = sumExpl; | |
sumExpl = sumExpl->getDetail(0)->clone(); // eliminate wrapper | |
_CLDELETE(tmp); | |
} | |
sumExpl->setDescription(_T("sum of:")); | |
qreal coordFactor = parentQuery->getSimilarity(searcher)->coord(coord, maxCoord); | |
if (coordFactor == 1.0f){ // coord is no-op | |
result->set(*sumExpl); // eliminate wrapper | |
_CLDELETE(sumExpl); | |
} else { | |
result->setDescription( _T("product of:")); | |
result->addDetail(sumExpl); | |
StringBuffer explbuf; | |
explbuf.append(_T("coord(")); | |
explbuf.appendInt(coord); | |
explbuf.append(_T("/")); | |
explbuf.appendInt(maxCoord); | |
explbuf.append(_T(")")); | |
result->addDetail(_CLNEW Explanation(coordFactor, explbuf.getBuffer())); | |
result->setValue(sum*coordFactor); | |
} | |
} | |
CL_NS_END |