qhelpsearchindexwriter_default.cpp 10.88 KiB
/****************************************************************************
**
** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
** Contact: http://www.qt-project.org/legal
**
** This file is part of the Qt Assistant of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL21$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://qt.digia.com/licensing. For further information
** use the contact form at http://qt.digia.com/contact-us.
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
** $QT_END_LICENSE$
****************************************************************************/
#include "qhelpsearchindexwriter_default_p.h"
#include "qhelp_global.h"
#include "qhelpenginecore.h"
#include <QtCore/QDir>
#include <QtCore/QSet>
#include <QtCore/QUrl>
#include <QtCore/QFile>
#include <QtCore/QRegExp>
#include <QtCore/QVariant>
#include <QtCore/QFileInfo>
#include <QtCore/QTextCodec>
#include <QtCore/QTextStream>
QT_BEGIN_NAMESPACE
namespace fulltextsearch {
namespace std {
Writer::Writer(const QString &path)
    : indexPath(path)
    , indexFile(QString())
    , documentFile(QString())
    // nothing todo
Writer::~Writer()
    reset();
void Writer::reset()
    for(QHash<QString, Entry*>::ConstIterator it =
        index.begin(); it != index.end(); ++it) {
            delete it.value();
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
} index.clear(); documentList.clear(); } bool Writer::writeIndex() const { bool status; QFile idxFile(indexFile); if (!(status = idxFile.open(QFile::WriteOnly))) return status; QDataStream indexStream(&idxFile); for(QHash<QString, Entry*>::ConstIterator it = index.begin(); it != index.end(); ++it) { indexStream << it.key(); indexStream << it.value()->documents.count(); indexStream << it.value()->documents; } idxFile.close(); QFile docFile(documentFile); if (!(status = docFile.open(QFile::WriteOnly))) return status; QDataStream docStream(&docFile); foreach(const QStringList &list, documentList) { docStream << list.at(0); docStream << list.at(1); } docFile.close(); return status; } void Writer::removeIndex() const { QFile idxFile(indexFile); if (idxFile.exists()) idxFile.remove(); QFile docFile(documentFile); if (docFile.exists()) docFile.remove(); } void Writer::setIndexFile(const QString &namespaceName, const QString &attributes) { QString extension = namespaceName + QLatin1String("@") + attributes; indexFile = indexPath + QLatin1String("/indexdb40.") + extension; documentFile = indexPath + QLatin1String("/indexdoc40.") + extension; } void Writer::insertInIndex(const QString &string, int docNum) { if (string == QLatin1String("amp") || string == QLatin1String("nbsp")) return; Entry *entry = 0; if (index.count()) entry = index[string]; if (entry) { if (entry->documents.last().docNumber != docNum) entry->documents.append(Document(docNum, 1)); else entry->documents.last().frequency++; } else { index.insert(string, new Entry(docNum));
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
} } void Writer::insertInDocumentList(const QString &title, const QString &url) { documentList.append(QStringList(title) << url); } QHelpSearchIndexWriter::QHelpSearchIndexWriter() : QThread() , m_cancel(false) { // nothing todo } QHelpSearchIndexWriter::~QHelpSearchIndexWriter() { mutex.lock(); this->m_cancel = true; waitCondition.wakeOne(); mutex.unlock(); wait(); } void QHelpSearchIndexWriter::cancelIndexing() { mutex.lock(); this->m_cancel = true; mutex.unlock(); } void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, const QString &indexFilesFolder, bool reindex) { wait(); QMutexLocker lock(&mutex); this->m_cancel = false; this->m_reindex = reindex; this->m_collectionFile = collectionFile; this->m_indexFilesFolder = indexFilesFolder; start(QThread::LowestPriority); } void QHelpSearchIndexWriter::run() { mutex.lock(); if (m_cancel) { mutex.unlock(); return; } const bool reindex(this->m_reindex); const QLatin1String key("DefaultSearchNamespaces"); const QString collectionFile(this->m_collectionFile); const QString indexPath = m_indexFilesFolder; mutex.unlock(); QHelpEngineCore engine(collectionFile, 0); if (!engine.setupData()) return; if (reindex) engine.setCustomValue(key, QLatin1String(""));
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
const QStringList registeredDocs = engine.registeredDocumentations(); const QStringList indexedNamespaces = engine.customValue(key).toString(). split(QLatin1String("|"), QString::SkipEmptyParts); emit indexingStarted(); QStringList namespaces; Writer writer(indexPath); foreach(const QString &namespaceName, registeredDocs) { mutex.lock(); if (m_cancel) { mutex.unlock(); return; } mutex.unlock(); // if indexed, continue namespaces.append(namespaceName); if (indexedNamespaces.contains(namespaceName)) continue; const QList<QStringList> attributeSets = engine.filterAttributeSets(namespaceName); foreach (const QStringList &attributes, attributeSets) { // cleanup maybe old or unfinished files writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@"))); writer.removeIndex(); QSet<QString> documentsSet; const QList<QUrl> docFiles = engine.files(namespaceName, attributes); foreach(QUrl url, docFiles) { if (m_cancel) return; // get rid of duplicated files if (url.hasFragment()) url.setFragment(QString()); QString s = url.toString(); if (s.endsWith(QLatin1String(".html")) || s.endsWith(QLatin1String(".htm")) || s.endsWith(QLatin1String(".txt"))) documentsSet.insert(s); } int docNum = 0; const QStringList documentsList(documentsSet.toList()); foreach(const QString &url, documentsList) { if (m_cancel) return; QByteArray data(engine.fileData(url)); if (data.isEmpty()) continue; QTextStream s(data); QString en = QHelpGlobal::codecFromData(data); s.setCodec(QTextCodec::codecForName(en.toLatin1().constData())); QString text = s.readAll(); if (text.isNull()) continue; QString title = QHelpGlobal::documentTitle(text); int j = 0; int i = 0; bool valid = true;
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
const QChar *buf = text.unicode(); QChar str[64]; QChar c = buf[0]; while ( j < text.length() ) { if (m_cancel) return; if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) { valid = false; if ( i > 1 ) writer.insertInIndex(QString(str,i), docNum); i = 0; c = buf[++j]; continue; } if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) { valid = true; c = buf[++j]; continue; } if ( !valid ) { c = buf[++j]; continue; } if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) { str[i] = c.toLower(); ++i; } else { if ( i > 1 ) writer.insertInIndex(QString(str,i), docNum); i = 0; } c = buf[++j]; } if ( i > 1 ) writer.insertInIndex(QString(str,i), docNum); docNum++; writer.insertInDocumentList(title, url); } if (writer.writeIndex()) { engine.setCustomValue(key, addNamespace( engine.customValue(key).toString(), namespaceName)); } writer.reset(); } } QStringListIterator qsli(indexedNamespaces); while (qsli.hasNext()) { const QString namespaceName = qsli.next(); if (namespaces.contains(namespaceName)) continue; const QList<QStringList> attributeSets = engine.filterAttributeSets(namespaceName); foreach (const QStringList &attributes, attributeSets) { writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@"))); writer.removeIndex(); } engine.setCustomValue(key, removeNamespace( engine.customValue(key).toString(), namespaceName)); } emit indexingFinished();
351352353354355356357358359360361362363364365366367368369370371372373374375376377
} QString QHelpSearchIndexWriter::addNamespace(const QString namespaces, const QString &namespaceName) { QString value = namespaces; if (!value.contains(namespaceName)) value.append(namespaceName).append(QLatin1String("|")); return value; } QString QHelpSearchIndexWriter::removeNamespace(const QString namespaces, const QString &namespaceName) { QString value = namespaces; if (value.contains(namespaceName)) value.remove(namespaceName + QLatin1String("|")); return value; } } // namespace std } // namespace fulltextsearch QT_END_NAMESPACE