-
Antti Kokko authored
- Renamed LICENSE.LGPL to LICENSE.LGPLv21 - Added LICENSE.LGPLv3 - Removed LICENSE.GPL Change-Id: I23ef9591f4d9054e0b6a252ba7767baf4189aeab Reviewed-by:
Jani Heikkinen <jani.heikkinen@digia.com>
794e271d
/****************************************************************************
**
** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
** Contact: http://www.qt-project.org/legal
**
** This file is part of the Qt Assistant of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL21$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://qt.digia.com/licensing. For further information
** use the contact form at http://qt.digia.com/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "qhelpsearchindexwriter_default_p.h"
#include "qhelp_global.h"
#include "qhelpenginecore.h"
#include <QtCore/QDir>
#include <QtCore/QSet>
#include <QtCore/QUrl>
#include <QtCore/QFile>
#include <QtCore/QRegExp>
#include <QtCore/QVariant>
#include <QtCore/QFileInfo>
#include <QtCore/QTextCodec>
#include <QtCore/QTextStream>
QT_BEGIN_NAMESPACE
namespace fulltextsearch {
namespace std {
Writer::Writer(const QString &path)
: indexPath(path)
, indexFile(QString())
, documentFile(QString())
{
// nothing todo
}
Writer::~Writer()
{
reset();
}
void Writer::reset()
{
for(QHash<QString, Entry*>::ConstIterator it =
index.begin(); it != index.end(); ++it) {
delete it.value();
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
}
index.clear();
documentList.clear();
}
bool Writer::writeIndex() const
{
bool status;
QFile idxFile(indexFile);
if (!(status = idxFile.open(QFile::WriteOnly)))
return status;
QDataStream indexStream(&idxFile);
for(QHash<QString, Entry*>::ConstIterator it =
index.begin(); it != index.end(); ++it) {
indexStream << it.key();
indexStream << it.value()->documents.count();
indexStream << it.value()->documents;
}
idxFile.close();
QFile docFile(documentFile);
if (!(status = docFile.open(QFile::WriteOnly)))
return status;
QDataStream docStream(&docFile);
foreach(const QStringList &list, documentList) {
docStream << list.at(0);
docStream << list.at(1);
}
docFile.close();
return status;
}
void Writer::removeIndex() const
{
QFile idxFile(indexFile);
if (idxFile.exists())
idxFile.remove();
QFile docFile(documentFile);
if (docFile.exists())
docFile.remove();
}
void Writer::setIndexFile(const QString &namespaceName, const QString &attributes)
{
QString extension = namespaceName + QLatin1String("@") + attributes;
indexFile = indexPath + QLatin1String("/indexdb40.") + extension;
documentFile = indexPath + QLatin1String("/indexdoc40.") + extension;
}
void Writer::insertInIndex(const QString &string, int docNum)
{
if (string == QLatin1String("amp") || string == QLatin1String("nbsp"))
return;
Entry *entry = 0;
if (index.count())
entry = index[string];
if (entry) {
if (entry->documents.last().docNumber != docNum)
entry->documents.append(Document(docNum, 1));
else
entry->documents.last().frequency++;
} else {
index.insert(string, new Entry(docNum));
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
}
}
void Writer::insertInDocumentList(const QString &title, const QString &url)
{
documentList.append(QStringList(title) << url);
}
QHelpSearchIndexWriter::QHelpSearchIndexWriter()
: QThread()
, m_cancel(false)
{
// nothing todo
}
QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
{
mutex.lock();
this->m_cancel = true;
waitCondition.wakeOne();
mutex.unlock();
wait();
}
void QHelpSearchIndexWriter::cancelIndexing()
{
mutex.lock();
this->m_cancel = true;
mutex.unlock();
}
void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
const QString &indexFilesFolder,
bool reindex)
{
wait();
QMutexLocker lock(&mutex);
this->m_cancel = false;
this->m_reindex = reindex;
this->m_collectionFile = collectionFile;
this->m_indexFilesFolder = indexFilesFolder;
start(QThread::LowestPriority);
}
void QHelpSearchIndexWriter::run()
{
mutex.lock();
if (m_cancel) {
mutex.unlock();
return;
}
const bool reindex(this->m_reindex);
const QLatin1String key("DefaultSearchNamespaces");
const QString collectionFile(this->m_collectionFile);
const QString indexPath = m_indexFilesFolder;
mutex.unlock();
QHelpEngineCore engine(collectionFile, 0);
if (!engine.setupData())
return;
if (reindex)
engine.setCustomValue(key, QLatin1String(""));
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
const QStringList registeredDocs = engine.registeredDocumentations();
const QStringList indexedNamespaces = engine.customValue(key).toString().
split(QLatin1String("|"), QString::SkipEmptyParts);
emit indexingStarted();
QStringList namespaces;
Writer writer(indexPath);
foreach(const QString &namespaceName, registeredDocs) {
mutex.lock();
if (m_cancel) {
mutex.unlock();
return;
}
mutex.unlock();
// if indexed, continue
namespaces.append(namespaceName);
if (indexedNamespaces.contains(namespaceName))
continue;
const QList<QStringList> attributeSets =
engine.filterAttributeSets(namespaceName);
foreach (const QStringList &attributes, attributeSets) {
// cleanup maybe old or unfinished files
writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
writer.removeIndex();
QSet<QString> documentsSet;
const QList<QUrl> docFiles = engine.files(namespaceName, attributes);
foreach(QUrl url, docFiles) {
if (m_cancel)
return;
// get rid of duplicated files
if (url.hasFragment())
url.setFragment(QString());
QString s = url.toString();
if (s.endsWith(QLatin1String(".html"))
|| s.endsWith(QLatin1String(".htm"))
|| s.endsWith(QLatin1String(".txt")))
documentsSet.insert(s);
}
int docNum = 0;
const QStringList documentsList(documentsSet.toList());
foreach(const QString &url, documentsList) {
if (m_cancel)
return;
QByteArray data(engine.fileData(url));
if (data.isEmpty())
continue;
QTextStream s(data);
QString en = QHelpGlobal::codecFromData(data);
s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
QString text = s.readAll();
if (text.isNull())
continue;
QString title = QHelpGlobal::documentTitle(text);
int j = 0;
int i = 0;
bool valid = true;
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
const QChar *buf = text.unicode();
QChar str[64];
QChar c = buf[0];
while ( j < text.length() ) {
if (m_cancel)
return;
if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
valid = false;
if ( i > 1 )
writer.insertInIndex(QString(str,i), docNum);
i = 0;
c = buf[++j];
continue;
}
if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
valid = true;
c = buf[++j];
continue;
}
if ( !valid ) {
c = buf[++j];
continue;
}
if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
str[i] = c.toLower();
++i;
} else {
if ( i > 1 )
writer.insertInIndex(QString(str,i), docNum);
i = 0;
}
c = buf[++j];
}
if ( i > 1 )
writer.insertInIndex(QString(str,i), docNum);
docNum++;
writer.insertInDocumentList(title, url);
}
if (writer.writeIndex()) {
engine.setCustomValue(key, addNamespace(
engine.customValue(key).toString(), namespaceName));
}
writer.reset();
}
}
QStringListIterator qsli(indexedNamespaces);
while (qsli.hasNext()) {
const QString namespaceName = qsli.next();
if (namespaces.contains(namespaceName))
continue;
const QList<QStringList> attributeSets =
engine.filterAttributeSets(namespaceName);
foreach (const QStringList &attributes, attributeSets) {
writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
writer.removeIndex();
}
engine.setCustomValue(key, removeNamespace(
engine.customValue(key).toString(), namespaceName));
}
emit indexingFinished();
351352353354355356357358359360361362363364365366367368369370371372373374375376377
}
QString QHelpSearchIndexWriter::addNamespace(const QString namespaces,
const QString &namespaceName)
{
QString value = namespaces;
if (!value.contains(namespaceName))
value.append(namespaceName).append(QLatin1String("|"));
return value;
}
QString QHelpSearchIndexWriter::removeNamespace(const QString namespaces,
const QString &namespaceName)
{
QString value = namespaces;
if (value.contains(namespaceName))
value.remove(namespaceName + QLatin1String("|"));
return value;
}
} // namespace std
} // namespace fulltextsearch
QT_END_NAMESPACE