opt: improve headword dialog performance

especially when the dictionary has a very large collecton of headword.
include export function
This commit is contained in:
Xiao YiFang 2022-09-19 22:19:22 +08:00
parent fdb4aae875
commit 4c8ec4e3f2
9 changed files with 394 additions and 66 deletions

View file

@ -1331,6 +1331,92 @@ void BtreeIndex::findArticleLinks( QVector< WordArticleLink > * articleLinks,
} }
} }
void BtreeIndex::findHeadWords( QSet<uint32_t> offsets,int& index,
QSet< QString > * headwords,
uint32_t length )
{
int i=0;
for(auto begin=offsets.begin();begin!=offsets.end();begin++,i++){
if(i<index){
continue;
}
findSingleNodeHeadwords(*begin,headwords);
index++;
if(headwords->size()>=length)
break;
}
}
void BtreeIndex::findSingleNodeHeadwords( uint32_t offsets,
QSet< QString > * headwords)
{
uint32_t currentNodeOffset = offsets;
Mutex::Lock _( *idxFileMutex );
char const * leaf = 0;
char const * leafEnd = 0;
char const * chainPtr = 0;
vector< char > extLeaf;
// A node
readNode( currentNodeOffset, extLeaf );
leaf = &extLeaf.front();
leafEnd = leaf + extLeaf.size();
// A leaf
chainPtr = leaf + sizeof( uint32_t );
for( ;; )
{
vector< WordArticleLink > result = readChain( chainPtr );
if( headwords )
{
for( unsigned i = 0; i < result.size(); i++ )
{
headwords->insert( QString::fromUtf8( ( result[ i ].prefix + result[ i ].word ).c_str() ) );
}
}
if( chainPtr >= leafEnd )
{
break; // That was the last leaf
}
}
}
//find the next chain ptr ,which is large than this currentChainPtr
QSet<uint32_t> BtreeIndex::findNodes()
{
Mutex::Lock _( *idxFileMutex );
if( !rootNodeLoaded )
{
// Time to load our root node. We do it only once, at the first request.
readNode( rootOffset, rootNode );
rootNodeLoaded = true;
}
char const * leaf = &rootNode.front();
vector< char > extLeaf;
QSet<uint32_t> leafOffset;
// the current the btree's implementation has the height = 2.
// A node offset
uint32_t * offsets = (uint32_t *)leaf + 1;
// char const * ptr = leaf + sizeof( uint32_t ) +
// ( indexNodeSize + 1 ) * sizeof( uint32_t );
int i=0;
while(i++ < (indexNodeSize+1) )
leafOffset.insert(*(offsets++));
return leafOffset;
}
void BtreeIndex::getHeadwordsFromOffsets( QList<uint32_t> & offsets, void BtreeIndex::getHeadwordsFromOffsets( QList<uint32_t> & offsets,
QVector<QString> & headwords, QVector<QString> & headwords,
QAtomicInt * isCancelled ) QAtomicInt * isCancelled )
@ -1481,6 +1567,12 @@ bool BtreeDictionary::getHeadwords( QStringList &headwords )
return headwords.size() > 0; return headwords.size() > 0;
} }
void BtreeDictionary::findHeadWordsWithLenth( int & index, QSet< QString > * headwords, uint32_t length )
{
auto leafNodeOffsets = findNodes();
findHeadWords(leafNodeOffsets,index,headwords,length);
}
void BtreeDictionary::getArticleText(uint32_t, QString &, QString & ) void BtreeDictionary::getArticleText(uint32_t, QString &, QString & )
{ {
} }

View file

@ -95,6 +95,11 @@ public:
QSet< QString > * headwords, QSet< QString > * headwords,
QAtomicInt * isCancelled = 0 ); QAtomicInt * isCancelled = 0 );
void findHeadWords( QSet<uint32_t> offsets,int& index, QSet< QString > * headwords, uint32_t length );
void findSingleNodeHeadwords( uint32_t offsets,
QSet< QString > * headwords);
QSet<uint32_t> findNodes( );
/// Retrieve headwords for presented article addresses /// Retrieve headwords for presented article addresses
void getHeadwordsFromOffsets( QList< uint32_t > & offsets, void getHeadwordsFromOffsets( QList< uint32_t > & offsets,
QVector< QString > & headwords, QVector< QString > & headwords,
@ -174,6 +179,7 @@ public:
{ return true; } { return true; }
virtual bool getHeadwords( QStringList &headwords ); virtual bool getHeadwords( QStringList &headwords );
virtual void findHeadWordsWithLenth( int &, QSet< QString > * headwords, uint32_t length );
virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text );

View file

@ -51,9 +51,7 @@ DictHeadwords::DictHeadwords( QWidget *parent, Config::Class & cfg_,
ui.matchCase->setChecked( cfg.headwordsDialog.matchCase ); ui.matchCase->setChecked( cfg.headwordsDialog.matchCase );
model = new QStringListModel( this ); model = new HeadwordListModel( this );
model->setStringList( headers );
proxy = new QSortFilterProxyModel( this ); proxy = new QSortFilterProxyModel( this );
proxy->setSourceModel( model ); proxy->setSourceModel( model );
@ -134,16 +132,12 @@ void DictHeadwords::setup( Dictionary::Class *dict_ )
setWindowTitle( QString::fromUtf8( dict->getName().c_str() ) ); setWindowTitle( QString::fromUtf8( dict->getName().c_str() ) );
headers.clear(); auto size = dict->getWordCount();
model->setStringList( headers ); model->setDict(dict);
dict->getHeadwords( headers );
model->setStringList( headers );
proxy->sort( 0 ); proxy->sort( 0 );
filterChanged(); filterChanged();
if( headers.size() > AUTO_APPLY_LIMIT ) if( size > AUTO_APPLY_LIMIT )
{ {
cfg.headwordsDialog.autoApply = ui.autoApply->isChecked(); cfg.headwordsDialog.autoApply = ui.autoApply->isChecked();
ui.autoApply->setChecked( false ); ui.autoApply->setChecked( false );
@ -169,7 +163,7 @@ void DictHeadwords::savePos()
cfg.headwordsDialog.searchMode = ui.searchModeCombo->currentIndex(); cfg.headwordsDialog.searchMode = ui.searchModeCombo->currentIndex();
cfg.headwordsDialog.matchCase = ui.matchCase->isChecked(); cfg.headwordsDialog.matchCase = ui.matchCase->isChecked();
if( headers.size() <= AUTO_APPLY_LIMIT ) if( model->totalCount() <= AUTO_APPLY_LIMIT )
cfg.headwordsDialog.autoApply = ui.autoApply->isChecked(); cfg.headwordsDialog.autoApply = ui.autoApply->isChecked();
cfg.headwordsDialog.headwordsDialogGeometry = saveGeometry(); cfg.headwordsDialog.headwordsDialogGeometry = saveGeometry();
@ -226,12 +220,15 @@ void DictHeadwords::filterChanged()
QString pattern; QString pattern;
switch( syntax ) switch( syntax )
{ {
case QRegExp::FixedString: pattern = QRegularExpression::escape( ui.filterLine->text() ); case QRegExp::FixedString:
break; pattern = QRegularExpression::escape( ui.filterLine->text() );
case QRegExp::WildcardUnix: pattern = wildcardsToRegexp( ui.filterLine->text() ); break;
break; case QRegExp::WildcardUnix:
default: pattern = ui.filterLine->text(); pattern = wildcardsToRegexp( ui.filterLine->text() );
break; break;
default:
pattern = ui.filterLine->text();
break;
} }
QRegularExpression regExp( pattern, options ); QRegularExpression regExp( pattern, options );
@ -244,9 +241,9 @@ void DictHeadwords::filterChanged()
QApplication::setOverrideCursor( Qt::WaitCursor ); QApplication::setOverrideCursor( Qt::WaitCursor );
model->setFilter(regExp);
proxy->setFilterRegularExpression( regExp ); proxy->setFilterRegularExpression( regExp );
proxy->sort( 0 ); proxy->sort( 0 );
QApplication::restoreOverrideCursor(); QApplication::restoreOverrideCursor();
@ -272,8 +269,7 @@ void DictHeadwords::autoApplyStateChanged( int state )
void DictHeadwords::showHeadwordsNumber() void DictHeadwords::showHeadwordsNumber()
{ {
ui.headersNumber->setText( tr( "Unique headwords total: %1, filtered: %2" ) ui.headersNumber->setText( tr( "Unique headwords total: %1, filtered: %2" )
.arg( QString::number( headers.size() ) ) .arg( QString::number( model->totalCount() ), QString::number( proxy->rowCount() ) ) );
.arg( QString::number( proxy->rowCount() ) ) );
} }
void DictHeadwords::saveHeadersToFile() void DictHeadwords::saveHeadersToFile()
@ -303,7 +299,7 @@ void DictHeadwords::saveHeadersToFile()
if ( !file.open( QFile::WriteOnly | QIODevice::Text ) ) if ( !file.open( QFile::WriteOnly | QIODevice::Text ) )
break; break;
int headwordsNumber = proxy->rowCount(); int headwordsNumber = model->totalCount();
// Setup progress dialog // Setup progress dialog
int n = headwordsNumber; int n = headwordsNumber;
@ -327,7 +323,7 @@ void DictHeadwords::saveHeadersToFile()
// Write headwords // Write headwords
int i; int i;
for( i = 0; i < headwordsNumber; ++i ) for( i = 0; i < headwordsNumber&&i<model->wordCount(); ++i )
{ {
if( i % step == 0 ) if( i % step == 0 )
progress.setValue( i / step ); progress.setValue( i / step );
@ -335,7 +331,7 @@ void DictHeadwords::saveHeadersToFile()
if( progress.wasCanceled() ) if( progress.wasCanceled() )
break; break;
QVariant value = proxy->data( proxy->index( i, 0 ) ); QVariant value = model->getRow(i);
if( !value.canConvert< QString >() ) if( !value.canConvert< QString >() )
continue; continue;
@ -350,6 +346,27 @@ void DictHeadwords::saveHeadersToFile()
break; break;
} }
//continue to write the remaining headword
int nodeIndex = model->getCurrentIndex();
auto headwords = model->getRemainRows(nodeIndex);
while(!headwords.isEmpty())
{
if( progress.wasCanceled() )
break;
for(auto & w:headwords){
//progress
if( ++i % step == 0 )
progress.setValue( i / step );
line = w.toUtf8();
line += "\n";
if ( file.write( line ) != line.size() )
break;
}
headwords = model->getRemainRows(nodeIndex);
}
if( i < headwordsNumber && !progress.wasCanceled() ) if( i < headwordsNumber && !progress.wasCanceled() )
break; break;

View file

@ -14,6 +14,7 @@
#include "dictionary.hh" #include "dictionary.hh"
#include "delegate.hh" #include "delegate.hh"
#include "helpwindow.hh" #include "helpwindow.hh"
#include "headwordslistmodel.h"
class DictHeadwords : public QDialog class DictHeadwords : public QDialog
{ {
@ -29,8 +30,8 @@ public:
protected: protected:
Config::Class & cfg; Config::Class & cfg;
Dictionary::Class * dict; Dictionary::Class * dict;
QStringList headers;
QStringListModel * model; HeadwordListModel * model;
QSortFilterProxyModel * proxy; QSortFilterProxyModel * proxy;
WordListItemDelegate * delegate; WordListItemDelegate * delegate;
QString dictId; QString dictId;

View file

@ -16,9 +16,49 @@
<layout class="QVBoxLayout" name="verticalLayout_3"> <layout class="QVBoxLayout" name="verticalLayout_3">
<item> <item>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="1" column="1">
<widget class="QCheckBox" name="autoApply">
<property name="toolTip">
<string>If checked any filter changes will we immediately applied to headwords list</string>
</property>
<property name="text">
<string>Auto apply</string>
</property>
</widget>
</item>
<item row="3" column="0"> <item row="3" column="0">
<widget class="QListView" name="headersListView"/> <widget class="QListView" name="headersListView"/>
</item> </item>
<item row="2" column="1">
<widget class="QPushButton" name="applyButton">
<property name="toolTip">
<string>Press this button to apply filter to headwords list</string>
</property>
<property name="text">
<string>Apply</string>
</property>
<property name="autoDefault">
<bool>false</bool>
</property>
<property name="default">
<bool>true</bool>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLineEdit" name="filterLine">
<property name="toolTip">
<string>Filter string (fixed string, wildcards or regular expression)</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Filter:</string>
</property>
</widget>
</item>
<item row="3" column="1"> <item row="3" column="1">
<layout class="QVBoxLayout" name="verticalLayout_2"> <layout class="QVBoxLayout" name="verticalLayout_2">
<item> <item>
@ -95,46 +135,6 @@
</item> </item>
</layout> </layout>
</item> </item>
<item row="2" column="1">
<widget class="QPushButton" name="applyButton">
<property name="toolTip">
<string>Press this button to apply filter to headwords list</string>
</property>
<property name="text">
<string>Apply</string>
</property>
<property name="autoDefault">
<bool>false</bool>
</property>
<property name="default">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="autoApply">
<property name="toolTip">
<string>If checked any filter changes will we immediately applied to headwords list</string>
</property>
<property name="text">
<string>Auto apply</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Filter:</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLineEdit" name="filterLine">
<property name="toolTip">
<string>Filter string (fixed string, wildcards or regular expression)</string>
</property>
</widget>
</item>
</layout> </layout>
</item> </item>
<item> <item>

View file

@ -443,6 +443,7 @@ public:
/// Retrieve all dictionary headwords /// Retrieve all dictionary headwords
virtual bool getHeadwords( QStringList & ) virtual bool getHeadwords( QStringList & )
{ return false; } { return false; }
virtual void findHeadWordsWithLenth( int &, QSet< QString > * headwords, uint32_t length ){}
/// Enable/disable search via synonyms /// Enable/disable search via synonyms
void setSynonymSearchEnabled( bool enabled ) void setSynonymSearchEnabled( bool enabled )

View file

@ -263,6 +263,7 @@ HEADERS += folding.hh \
base/globalregex.hh \ base/globalregex.hh \
globalbroadcaster.h \ globalbroadcaster.h \
iframeschemehandler.h \ iframeschemehandler.h \
headwordslistmodel.h \
inc_case_folding.hh \ inc_case_folding.hh \
inc_diacritic_folding.hh \ inc_diacritic_folding.hh \
mainwindow.hh \ mainwindow.hh \
@ -406,6 +407,7 @@ SOURCES += folding.cc \
base/globalregex.cc \ base/globalregex.cc \
globalbroadcaster.cpp \ globalbroadcaster.cpp \
iframeschemehandler.cpp \ iframeschemehandler.cpp \
headwordslistmodel.cpp \
main.cc \ main.cc \
dictionary.cc \ dictionary.cc \
config.cc \ config.cc \

161
headwordslistmodel.cpp Normal file
View file

@ -0,0 +1,161 @@
#include "headwordslistmodel.h"
#include "wstring_qt.hh"
HeadwordListModel::HeadwordListModel(QObject *parent)
: QAbstractListModel(parent), index(0),ptr(0)
{}
int HeadwordListModel::rowCount(const QModelIndex &parent) const
{
return parent.isValid() ? 0 : words.size();
}
int HeadwordListModel::totalCount() const{
return totalSize;
}
bool HeadwordListModel::isFinish() const{
return words.size() >=totalSize;
}
//export headword
QString HeadwordListModel::getRow(int row)
{
if(fileSortedList.empty()){
fileSortedList<<words;
fileSortedList.sort();
}
return fileSortedList.at(row);
}
void HeadwordListModel::setFilter(QRegularExpression reg){
if(reg.pattern().isEmpty())
return;
filterWords.clear();
auto sr = _dict->prefixMatch(gd::toWString(reg.pattern()),30);
connect( sr.get(), SIGNAL( finished() ),
this, SLOT( requestFinished() ), Qt::QueuedConnection );
queuedRequests.push_back( sr );
}
void HeadwordListModel::requestFinished()
{
// See how many new requests have finished, and if we have any new results
for( std::list< sptr< Dictionary::WordSearchRequest > >::iterator i =
queuedRequests.begin(); i != queuedRequests.end(); )
{
if ( (*i)->isFinished() )
{
if ( !(*i)->getErrorString().isEmpty() )
{
qDebug()<<"error:"<<(*i)->getErrorString();
}
if ( (*i)->matchesCount() )
{
auto allmatches = (*i)->getAllMatches();
for(auto& match:allmatches)
filterWords.append(gd::toQString(match.word));
}
queuedRequests.erase( i++ );
}
else
++i;
}
if(queuedRequests.empty()){
QStringList filtered;
for(auto& w:filterWords){
if(!words.contains(w)){
filtered<<w;
}
}
beginInsertRows(QModelIndex(), words.size(), words.size() + filtered.count() - 1);
for(const auto & word:filtered)
words.append(word);
endInsertRows();
}
}
int HeadwordListModel::wordCount() const{
return words.size();
}
QVariant HeadwordListModel::data(const QModelIndex &index, int role) const
{
if (!index.isValid())
return QVariant();
if (index.row() >= totalSize || index.row() < 0 || index.row()>=words.size())
return QVariant();
if (role == Qt::DisplayRole) {
return words.at(index.row());
}
return QVariant();
}
bool HeadwordListModel::canFetchMore(const QModelIndex &parent) const
{
if (parent.isValid())
return false;
return (words.size() < totalSize);
}
void HeadwordListModel::fetchMore(const QModelIndex &parent)
{
if (parent.isValid())
return;
QSet<QString> headword;
Mutex::Lock _(lock);
_dict->findHeadWordsWithLenth(index,&headword,10000);
if(headword.isEmpty()){
return;
}
QSet<QString> filtered;
for(const auto & word:qAsConst(headword))
{
if(!words.contains(word))
filtered.insert(word);
}
beginInsertRows(QModelIndex(), words.size(), words.size() + filtered.count() - 1);
for(const auto & word:filtered)
{
words.append(word);
}
endInsertRows();
emit numberPopulated(words.size());
}
int HeadwordListModel::getCurrentIndex()
{
return index;
}
QSet<QString> HeadwordListModel::getRemainRows(int & nodeIndex)
{
QSet<QString> headword;
Mutex::Lock _(lock);
_dict->findHeadWordsWithLenth(nodeIndex, &headword,10000);
QSet<QString> filtered;
for(const auto & word:headword)
{
if(!words.contains(word))
filtered.insert(word);
}
return filtered;
}
void HeadwordListModel::setDict(Dictionary::Class * dict){
_dict = dict;
totalSize = _dict->getWordCount();
}

48
headwordslistmodel.h Normal file
View file

@ -0,0 +1,48 @@
#ifndef HEADWORDSLISTMODEL_H
#define HEADWORDSLISTMODEL_H
#include "dictionary.hh"
#include <QAbstractListModel>
#include <QStringList>
class HeadwordListModel : public QAbstractListModel
{
Q_OBJECT
public:
HeadwordListModel(QObject *parent = nullptr);
int rowCount(const QModelIndex &parent = QModelIndex()) const override;
int totalCount() const;
int wordCount() const;
bool isFinish() const;
QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const override;
QString getRow(int row);
void setFilter(QRegularExpression);
int getCurrentIndex();
QSet<QString> getRemainRows(int & nodeIndex);
signals:
void numberPopulated(int number);
void finished(int number);
public slots:
void setDict(Dictionary::Class * dict);
void requestFinished();
protected:
bool canFetchMore(const QModelIndex &parent) const override;
void fetchMore(const QModelIndex &parent) override;
private:
QStringList words;
QStringList filterWords;
QStringList fileSortedList;
long totalSize;
Dictionary::Class * _dict;
int index;
char* ptr;
Mutex lock;
std::list< sptr< Dictionary::WordSearchRequest > > queuedRequests;
};
#endif // HEADWORDSLISTMODEL_H