| /**************************************************************************** |
| ** |
| ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). |
| ** All rights reserved. |
| ** Contact: Nokia Corporation (qt-info@nokia.com) |
| ** |
| ** This file is part of the Qt Assistant of the Qt Toolkit. |
| ** |
| ** $QT_BEGIN_LICENSE:LGPL$ |
| ** GNU Lesser General Public License Usage |
| ** This file may be used under the terms of the GNU Lesser General Public |
| ** License version 2.1 as published by the Free Software Foundation and |
| ** appearing in the file LICENSE.LGPL included in the packaging of this |
| ** file. Please review the following information to ensure the GNU Lesser |
| ** General Public License version 2.1 requirements will be met: |
| ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
| ** |
| ** In addition, as a special exception, Nokia gives you certain additional |
| ** rights. These rights are described in the Nokia Qt LGPL Exception |
| ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
| ** |
| ** GNU General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU General |
| ** Public License version 3.0 as published by the Free Software Foundation |
| ** and appearing in the file LICENSE.GPL included in the packaging of this |
| ** file. Please review the following information to ensure the GNU General |
| ** Public License version 3.0 requirements will be met: |
| ** http://www.gnu.org/copyleft/gpl.html. |
| ** |
| ** Other Usage |
| ** Alternatively, this file may be used in accordance with the terms and |
| ** conditions contained in a signed written agreement between you and Nokia. |
| ** |
| ** |
| ** |
| ** |
| ** |
| ** $QT_END_LICENSE$ |
| ** |
| ****************************************************************************/ |
| |
| #include "qhelpenginecore.h" |
| #include "qhelpsearchindexreader_default_p.h" |
| |
| #include <QtCore/QDir> |
| #include <QtCore/QUrl> |
| #include <QtCore/QFile> |
| #include <QtCore/QVariant> |
| #include <QtCore/QFileInfo> |
| #include <QtCore/QDataStream> |
| #include <QtCore/QTextStream> |
| |
| QT_BEGIN_NAMESPACE |
| |
| namespace fulltextsearch { |
| namespace std { |
| |
| namespace { |
| QStringList split( const QString &str ) |
| { |
| QStringList lst; |
| int j = 0; |
| int i = str.indexOf(QLatin1Char('*'), j ); |
| |
| if (str.startsWith(QLatin1String("*"))) |
| lst << QLatin1String("*"); |
| |
| while ( i != -1 ) { |
| if ( i > j && i <= (int)str.length() ) { |
| lst << str.mid( j, i - j ); |
| lst << QLatin1String("*"); |
| } |
| j = i + 1; |
| i = str.indexOf(QLatin1Char('*'), j ); |
| } |
| |
| int l = str.length() - 1; |
| if ( str.mid( j, l - j + 1 ).length() > 0 ) |
| lst << str.mid( j, l - j + 1 ); |
| |
| return lst; |
| } |
| } |
| |
| |
| Reader::Reader() |
| : indexPath(QString()) |
| , indexFile(QString()) |
| , documentFile(QString()) |
| { |
| termList.clear(); |
| indexTable.clear(); |
| searchIndexTable.clear(); |
| } |
| |
| Reader::~Reader() |
| { |
| reset(); |
| searchIndexTable.clear(); |
| } |
| |
| bool Reader::readIndex() |
| { |
| if (indexTable.contains(indexFile)) |
| return true; |
| |
| QFile idxFile(indexFile); |
| if (!idxFile.open(QFile::ReadOnly)) |
| return false; |
| |
| QString key; |
| int numOfDocs; |
| EntryTable entryTable; |
| QVector<Document> docs; |
| QDataStream dictStream(&idxFile); |
| while (!dictStream.atEnd()) { |
| dictStream >> key; |
| dictStream >> numOfDocs; |
| docs.resize(numOfDocs); |
| dictStream >> docs; |
| entryTable.insert(key, new Entry(docs)); |
| } |
| idxFile.close(); |
| |
| if (entryTable.isEmpty()) |
| return false; |
| |
| QFile docFile(documentFile); |
| if (!docFile.open(QFile::ReadOnly)) |
| return false; |
| |
| QString title, url; |
| DocumentList documentList; |
| QDataStream docStream(&docFile); |
| while (!docStream.atEnd()) { |
| docStream >> title; |
| docStream >> url; |
| documentList.append(QStringList(title) << url); |
| } |
| docFile.close(); |
| |
| if (documentList.isEmpty()) { |
| cleanupIndex(entryTable); |
| return false; |
| } |
| |
| indexTable.insert(indexFile, Index(entryTable, documentList)); |
| return true; |
| } |
| |
| bool Reader::initCheck() const |
| { |
| return !searchIndexTable.isEmpty(); |
| } |
| |
| void Reader::setIndexPath(const QString &path) |
| { |
| indexPath = path; |
| } |
| |
| void Reader::filterFilesForAttributes(const QStringList &attributes) |
| { |
| searchIndexTable.clear(); |
| for(IndexTable::ConstIterator it = indexTable.begin(); it != indexTable.end(); ++it) { |
| const QString fileName = it.key(); |
| bool containsAll = true; |
| QStringList split = fileName.split(QLatin1String("@")); |
| foreach (const QString &attribute, attributes) { |
| if (!split.contains(attribute, Qt::CaseInsensitive)) { |
| containsAll = false; |
| break; |
| } |
| } |
| |
| if (containsAll) |
| searchIndexTable.insert(fileName, it.value()); |
| } |
| } |
| |
| void Reader::setIndexFile(const QString &namespaceName, const QString &attributes) |
| { |
| QString extension = namespaceName + QLatin1String("@") + attributes; |
| indexFile = indexPath + QLatin1String("/indexdb40.") + extension; |
| documentFile = indexPath + QLatin1String("/indexdoc40.") + extension; |
| } |
| |
| bool Reader::splitSearchTerm(const QString &searchTerm, QStringList *terms, |
| QStringList *termSeq, QStringList *seqWords) |
| { |
| QString term = searchTerm; |
| |
| term = term.simplified(); |
| term = term.replace(QLatin1String("\'"), QLatin1String("\"")); |
| term = term.replace(QLatin1String("`"), QLatin1String("\"")); |
| term = term.replace(QLatin1String("-"), QLatin1String(" ")); |
| term = term.replace(QRegExp(QLatin1String("\\s[\\S]?\\s")), QLatin1String(" ")); |
| |
| *terms = term.split(QLatin1Char(' ')); |
| QStringList::iterator it = terms->begin(); |
| for (; it != terms->end(); ++it) { |
| (*it) = (*it).simplified(); |
| (*it) = (*it).toLower(); |
| (*it) = (*it).replace(QLatin1String("\""), QLatin1String("")); |
| } |
| |
| if (term.contains(QLatin1Char('\"'))) { |
| if ((term.count(QLatin1Char('\"')))%2 == 0) { |
| int beg = 0; |
| int end = 0; |
| QString s; |
| beg = term.indexOf(QLatin1Char('\"'), beg); |
| while (beg != -1) { |
| beg++; |
| end = term.indexOf(QLatin1Char('\"'), beg); |
| s = term.mid(beg, end - beg); |
| s = s.toLower(); |
| s = s.simplified(); |
| if (s.contains(QLatin1Char('*'))) { |
| qWarning("Full Text Search, using a wildcard within phrases is not allowed."); |
| return false; |
| } |
| *seqWords += s.split(QLatin1Char(' ')); |
| *termSeq << s; |
| beg = term.indexOf(QLatin1Char('\"'), end + 1); |
| } |
| } else { |
| qWarning("Full Text Search, the closing quotation mark is missing."); |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| void Reader::searchInIndex(const QStringList &terms) |
| { |
| foreach (const QString &term, terms) { |
| QVector<Document> documents; |
| |
| for(IndexTable::ConstIterator it = searchIndexTable.begin(); |
| it != searchIndexTable.end(); ++it) { |
| EntryTable entryTable = it.value().first; |
| DocumentList documentList = it.value().second; |
| |
| if (term.contains(QLatin1Char('*'))) |
| documents = setupDummyTerm(getWildcardTerms(term, entryTable), entryTable); |
| else if (entryTable.value(term)) |
| documents = entryTable.value(term)->documents; |
| else |
| continue; |
| |
| if (!documents.isEmpty()) { |
| DocumentInfo info; |
| QString title, url; |
| QVector<DocumentInfo> documentsInfo; |
| foreach(const Document &doc, documents) { |
| info.docNumber = doc.docNumber; |
| info.frequency = doc.frequency; |
| info.documentUrl = documentList.at(doc.docNumber).at(1); |
| info.documentTitle = documentList.at(doc.docNumber).at(0); |
| documentsInfo.append(info); |
| } |
| |
| bool found = false; |
| for(QList<TermInfo>::Iterator tit = termList.begin(); |
| tit != termList.end(); ++tit) { |
| TermInfo *t = &(*tit); |
| if(t->term == term) { |
| t->documents += documentsInfo; |
| t->frequency += documentsInfo.count(); |
| found = true; break; |
| } |
| } |
| if (!found) |
| termList.append(TermInfo(term, documentsInfo.count(), documentsInfo)); |
| } |
| } |
| } |
| qSort(termList); |
| } |
| |
| QVector<DocumentInfo> Reader::hits() |
| { |
| QVector<DocumentInfo> documents; |
| if (!termList.count()) |
| return documents; |
| |
| documents = termList.takeFirst().documents; |
| for(QList<TermInfo>::Iterator it = termList.begin(); it != termList.end(); ++it) { |
| TermInfo *t = &(*it); |
| QVector<DocumentInfo> docs = t->documents; |
| for(QVector<DocumentInfo>::Iterator minDoc_it = documents.begin(); |
| minDoc_it != documents.end(); ) { |
| bool found = false; |
| for (QVector<DocumentInfo>::ConstIterator doc_it = docs.constBegin(); |
| doc_it != docs.constEnd(); ++doc_it ) { |
| if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) { |
| (*minDoc_it).frequency += (*doc_it).frequency; |
| found = true; |
| break; |
| } |
| } |
| if (!found) |
| minDoc_it = documents.erase(minDoc_it); |
| else |
| ++minDoc_it; |
| } |
| } |
| |
| qSort(documents); |
| return documents; |
| } |
| |
| bool Reader::searchForPattern(const QStringList &patterns, const QStringList &words, |
| const QByteArray &data) |
| { |
| if (data.isEmpty()) |
| return false; |
| |
| for(QHash<QString, PosEntry*>::ConstIterator mit = |
| miniIndex.begin(); mit != miniIndex.end(); ++mit) { |
| delete mit.value(); |
| } |
| miniIndex.clear(); |
| |
| wordNum = 3; |
| QStringList::ConstIterator cIt = words.begin(); |
| for ( ; cIt != words.end(); ++cIt ) |
| miniIndex.insert(*cIt, new PosEntry(0)); |
| |
| QTextStream s(data); |
| QString text = s.readAll(); |
| bool valid = true; |
| const QChar *buf = text.unicode(); |
| QChar str[64]; |
| QChar c = buf[0]; |
| int j = 0; |
| int i = 0; |
| while ( j < text.length() ) { |
| if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) { |
| valid = false; |
| if ( i > 1 ) |
| buildMiniIndex( QString(str,i) ); |
| i = 0; |
| c = buf[++j]; |
| continue; |
| } |
| if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) { |
| valid = true; |
| c = buf[++j]; |
| continue; |
| } |
| if ( !valid ) { |
| c = buf[++j]; |
| continue; |
| } |
| if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) { |
| str[i] = c.toLower(); |
| ++i; |
| } else { |
| if ( i > 1 ) |
| buildMiniIndex( QString(str,i) ); |
| i = 0; |
| } |
| c = buf[++j]; |
| } |
| if ( i > 1 ) |
| buildMiniIndex( QString(str,i) ); |
| |
| QStringList::ConstIterator patIt = patterns.begin(); |
| QStringList wordLst; |
| QList<uint> a, b; |
| QList<uint>::iterator aIt; |
| for ( ; patIt != patterns.end(); ++patIt ) { |
| wordLst = (*patIt).split(QLatin1Char(' ')); |
| a = miniIndex[ wordLst[0] ]->positions; |
| for ( int j = 1; j < (int)wordLst.count(); ++j ) { |
| b = miniIndex[ wordLst[j] ]->positions; |
| aIt = a.begin(); |
| while ( aIt != a.end() ) { |
| if ( b.contains( *aIt + 1 )) { |
| (*aIt)++; |
| ++aIt; |
| } else { |
| aIt = a.erase( aIt ); |
| } |
| } |
| } |
| } |
| if ( a.count() ) |
| return true; |
| return false; |
| } |
| |
| QVector<Document> Reader::setupDummyTerm(const QStringList &terms, |
| const EntryTable &entryTable) |
| { |
| QList<Term> termList; |
| for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) { |
| if (entryTable.value(*it)) { |
| Entry *e = entryTable.value(*it); |
| termList.append(Term(*it, e->documents.count(), e->documents ) ); |
| } |
| } |
| QVector<Document> maxList(0); |
| if ( !termList.count() ) |
| return maxList; |
| qSort(termList); |
| |
| maxList = termList.takeLast().documents; |
| for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) { |
| Term *t = &(*it); |
| QVector<Document> docs = t->documents; |
| for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) { |
| if ( maxList.indexOf( *docIt ) == -1 ) |
| maxList.append( *docIt ); |
| } |
| } |
| return maxList; |
| } |
| |
| QStringList Reader::getWildcardTerms(const QString &term, |
| const EntryTable &entryTable) |
| { |
| QStringList lst; |
| QStringList terms = split(term); |
| QStringList::Iterator iter; |
| |
| for(EntryTable::ConstIterator it = entryTable.begin(); |
| it != entryTable.end(); ++it) { |
| int index = 0; |
| bool found = false; |
| QString text( it.key() ); |
| for ( iter = terms.begin(); iter != terms.end(); ++iter ) { |
| if ( *iter == QLatin1String("*") ) { |
| found = true; |
| continue; |
| } |
| if ( iter == terms.begin() && (*iter)[0] != text[0] ) { |
| found = false; |
| break; |
| } |
| index = text.indexOf( *iter, index ); |
| if ( *iter == terms.last() && index != (int)text.length()-1 ) { |
| index = text.lastIndexOf( *iter ); |
| if ( index != (int)text.length() - (int)(*iter).length() ) { |
| found = false; |
| break; |
| } |
| } |
| if ( index != -1 ) { |
| found = true; |
| index += (*iter).length(); |
| continue; |
| } else { |
| found = false; |
| break; |
| } |
| } |
| if (found) |
| lst << text; |
| } |
| |
| return lst; |
| } |
| |
| void Reader::buildMiniIndex(const QString &string) |
| { |
| if (miniIndex[string]) |
| miniIndex[string]->positions.append(wordNum); |
| ++wordNum; |
| } |
| |
| void Reader::reset() |
| { |
| for(IndexTable::Iterator it = indexTable.begin(); |
| it != indexTable.end(); ++it) { |
| cleanupIndex(it.value().first); |
| it.value().second.clear(); |
| } |
| } |
| |
| void Reader::cleanupIndex(EntryTable &entryTable) |
| { |
| for(EntryTable::ConstIterator it = |
| entryTable.begin(); it != entryTable.end(); ++it) { |
| delete it.value(); |
| } |
| |
| entryTable.clear(); |
| } |
| |
| |
| QHelpSearchIndexReaderDefault::QHelpSearchIndexReaderDefault() |
| : QHelpSearchIndexReader() |
| { |
| // nothing todo |
| } |
| |
| QHelpSearchIndexReaderDefault::~QHelpSearchIndexReaderDefault() |
| { |
| } |
| |
| void QHelpSearchIndexReaderDefault::run() |
| { |
| mutex.lock(); |
| |
| if (m_cancel) { |
| mutex.unlock(); |
| return; |
| } |
| |
| const QList<QHelpSearchQuery> &queryList = this->m_query; |
| const QLatin1String key("DefaultSearchNamespaces"); |
| const QString collectionFile(this->m_collectionFile); |
| const QString indexPath = m_indexFilesFolder; |
| |
| mutex.unlock(); |
| |
| QString queryTerm; |
| foreach (const QHelpSearchQuery &query, queryList) { |
| if (query.fieldName == QHelpSearchQuery::DEFAULT) { |
| queryTerm = query.wordList.at(0); |
| break; |
| } |
| } |
| |
| if (queryTerm.isEmpty()) |
| return; |
| |
| QHelpEngineCore engine(collectionFile, 0); |
| if (!engine.setupData()) |
| return; |
| |
| const QStringList registeredDocs = engine.registeredDocumentations(); |
| const QStringList indexedNamespaces = engine.customValue(key).toString(). |
| split(QLatin1String("|"), QString::SkipEmptyParts); |
| |
| emit searchingStarted(); |
| |
| // setup the reader |
| m_reader.setIndexPath(indexPath); |
| foreach(const QString &namespaceName, registeredDocs) { |
| mutex.lock(); |
| if (m_cancel) { |
| mutex.unlock(); |
| searchingFinished(0); // TODO: check this ??? |
| return; |
| } |
| mutex.unlock(); |
| |
| const QList<QStringList> attributeSets = |
| engine.filterAttributeSets(namespaceName); |
| |
| foreach (const QStringList &attributes, attributeSets) { |
| // read all index files |
| m_reader.setIndexFile(namespaceName, attributes.join(QLatin1String("@"))); |
| if (!m_reader.readIndex()) { |
| qWarning("Full Text Search, could not read file for namespace: %s.", |
| namespaceName.toUtf8().constData()); |
| } |
| } |
| } |
| |
| // get the current filter attributes and minimize the index files table |
| m_reader.filterFilesForAttributes(engine.filterAttributes(engine.currentFilter())); |
| |
| hitList.clear(); |
| QStringList terms, termSeq, seqWords; |
| if (m_reader.initCheck() && // check if we could read anything |
| m_reader.splitSearchTerm(queryTerm, &terms, &termSeq, &seqWords) ) { |
| |
| // search for term(s) |
| m_reader.searchInIndex(terms); // TODO: should this be interruptible as well ??? |
| |
| QVector<DocumentInfo> hits = m_reader.hits(); |
| if (!hits.isEmpty()) { |
| if (termSeq.isEmpty()) { |
| foreach (const DocumentInfo &docInfo, hits) { |
| mutex.lock(); |
| if (m_cancel) { |
| mutex.unlock(); |
| searchingFinished(0); // TODO: check this, speed issue while locking??? |
| return; |
| } |
| mutex.unlock(); |
| hitList.append(qMakePair(docInfo.documentTitle, docInfo.documentUrl)); |
| } |
| } else { |
| foreach (const DocumentInfo &docInfo, hits) { |
| mutex.lock(); |
| if (m_cancel) { |
| mutex.unlock(); |
| searchingFinished(0); // TODO: check this, speed issue while locking??? |
| return; |
| } |
| mutex.unlock(); |
| |
| if (m_reader.searchForPattern(termSeq, seqWords, engine.fileData(docInfo.documentUrl))) // TODO: should this be interruptible as well ??? |
| hitList.append(qMakePair(docInfo.documentTitle, docInfo.documentUrl)); |
| } |
| } |
| } |
| } |
| |
| emit searchingFinished(hitList.count()); |
| } |
| |
| } // namespace std |
| } // namespace fulltextsearch |
| |
| QT_END_NAMESPACE |