goldendict-ng/fulltextsearch.cc
2022-10-11 22:09:15 +08:00

822 lines
23 KiB
C++

/* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "fulltextsearch.hh"
#include "ftshelpers.hh"
#include "gddebug.hh"
#include "mainwindow.hh"
#include "utils.hh"
#include <QThreadPool>
#include <QIntValidator>
#include <QMessageBox>
#include <qalgorithms.h>
#if defined( Q_OS_WIN32 )
#include "initializing.hh"
#include <qt_windows.h>
#include <QOperatingSystemVersion>
#endif
#include "base/globalregex.hh"
namespace FTS
{
enum
{
MinDistanceBetweenWords = 0,
MaxDistanceBetweenWords = 15,
MinArticlesPerDictionary = 1,
MaxArticlesPerDictionary = 10000
};
void Indexing::run()
{
try
{
timerThread->start();
// First iteration - dictionaries with no more MaxDictionarySizeForFastSearch articles
for( size_t x = 0; x < dictionaries.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
break;
if( dictionaries.at( x )->canFTS()
&&!dictionaries.at( x )->haveFTSIndex() )
{
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() ) );
dictionaries.at( x )->makeFTSIndex( isCancelled, true );
}
}
// Second iteration - all remaining dictionaries
for( size_t x = 0; x < dictionaries.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
break;
if( dictionaries.at( x )->canFTS()
&&!dictionaries.at( x )->haveFTSIndex() )
{
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() ) );
dictionaries.at( x )->makeFTSIndex( isCancelled, false );
}
}
timerThread->quit();
timerThread->wait();
}
catch( std::exception &ex )
{
gdWarning( "Exception occured while full-text search: %s", ex.what() );
}
emit sendNowIndexingName( QString() );
}
void Indexing::timeout(){
for( size_t x = 0; x < dictionaries.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
break;
auto progress = dictionaries.at( x )->getIndexingFtsProgress();
if( progress>0&&progress<100)
{
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() )+QString("......%1%2").arg("%").arg(progress) );
}
}
}
FtsIndexing::FtsIndexing( std::vector< sptr< Dictionary::Class > > const & dicts):
dictionaries( dicts ),
started( false )
{
}
void FtsIndexing::doIndexing()
{
if( started )
stopIndexing();
if( !started )
{
while( Utils::AtomicInt::loadAcquire( isCancelled ) )
isCancelled.deref();
Indexing *idx = new Indexing( isCancelled, dictionaries, indexingExited );
connect( idx, SIGNAL( sendNowIndexingName( QString ) ), this, SLOT( setNowIndexedName( QString ) ) );
QThreadPool::globalInstance()->start( idx );
started = true;
}
}
void FtsIndexing::stopIndexing()
{
if( started )
{
if( !Utils::AtomicInt::loadAcquire( isCancelled ) )
isCancelled.ref();
indexingExited.acquire();
started = false;
setNowIndexedName( QString() );
}
}
void FtsIndexing::setNowIndexedName( QString name )
{
{
Mutex::Lock _( nameMutex );
nowIndexing = name;
}
emit newIndexingName( name );
}
QString FtsIndexing::nowIndexingName()
{
Mutex::Lock _( nameMutex );
return nowIndexing;
}
void addSortedHeadwords( QList< FtsHeadword > & base_list, QList< FtsHeadword > const & add_list)
{
QList< FtsHeadword > list;
if( add_list.isEmpty() )
return;
if( base_list.isEmpty() )
{
base_list = add_list;
return;
}
list.reserve( base_list.size() + add_list.size() );
QList< FtsHeadword >::iterator base_it = base_list.begin();
QList< FtsHeadword >::const_iterator add_it = add_list.constBegin();
while( base_it != base_list.end() || add_it != add_list.end() )
{
if( base_it == base_list.end() )
{
while( add_it != add_list.end() )
{
list.append( *add_it );
++add_it;
}
break;
}
if( add_it == add_list.end() )
{
while( base_it != base_list.end() )
{
list.append( *base_it );
++base_it;
}
break;
}
if( *add_it < *base_it )
{
list.append( *add_it );
++add_it;
}
else if( *add_it == *base_it )
{
base_it->dictIDs.append( add_it->dictIDs );
for( QStringList::const_iterator itr = add_it->foundHiliteRegExps.constBegin();
itr != add_it->foundHiliteRegExps.constEnd(); ++itr )
{
if( !base_it->foundHiliteRegExps.contains( *itr ) )
base_it->foundHiliteRegExps.append( *itr );
}
++add_it;
}
else
{
list.append( *base_it );
++base_it;
}
}
base_list.swap( list );
}
FullTextSearchDialog::FullTextSearchDialog( QWidget * parent,
Config::Class & cfg_,
std::vector< sptr< Dictionary::Class > > const & dictionaries_,
std::vector< Instances::Group > const & groups_,
FtsIndexing & ftsidx ) :
QDialog( parent ),
cfg( cfg_ ),
dictionaries( dictionaries_ ),
groups( groups_ ),
group( 0 ),
ignoreWordsOrder( cfg_.preferences.fts.ignoreWordsOrder ),
ignoreDiacritics( cfg_.preferences.fts.ignoreDiacritics ),
ftsIdx( ftsidx )
, helpAction( this )
{
ui.setupUi( this );
setAttribute( Qt::WA_DeleteOnClose, false );
setWindowFlags( windowFlags() & ~Qt::WindowContextHelpButtonHint );
setWindowTitle( tr( "Full-text search" ) );
if( cfg.preferences.fts.dialogGeometry.size() > 0 )
restoreGeometry( cfg.preferences.fts.dialogGeometry );
setNewIndexingName( ftsIdx.nowIndexingName() );
connect( &ftsIdx, SIGNAL( newIndexingName( QString ) ),
this, SLOT( setNewIndexingName( QString ) ) );
ui.searchMode->addItem( tr( "Whole words" ), WholeWords );
ui.searchMode->addItem( tr( "Plain text"), PlainText );
ui.searchMode->addItem( tr( "Wildcards" ), Wildcards );
#ifndef USE_XAPIAN
ui.searchMode->addItem( tr( "RegExp" ), RegExp );
#else
ui.matchCase->hide();
ui.articlesPerDictionary->hide();
ui.checkBoxArticlesPerDictionary->hide();
ui.checkBoxIgnoreDiacritics->hide();
ui.checkBoxDistanceBetweenWords->hide();
ui.distanceBetweenWords->hide();
ui.checkBoxIgnoreWordOrder->hide();
ui.searchLine->setToolTip(tr("support xapian search syntax,such as AND OR +/- etc"));
#endif
ui.searchMode->setCurrentIndex( cfg.preferences.fts.searchMode );
ui.searchProgressBar->hide();
ui.checkBoxDistanceBetweenWords->setText( tr( "Max distance between words (%1-%2):" )
.arg( QString::number( MinDistanceBetweenWords ) )
.arg( QString::number( MaxDistanceBetweenWords ) ) );
ui.checkBoxDistanceBetweenWords->setChecked( cfg.preferences.fts.useMaxDistanceBetweenWords );
ui.distanceBetweenWords->setMinimum( MinDistanceBetweenWords );
ui.distanceBetweenWords->setMaximum( MaxDistanceBetweenWords );
ui.distanceBetweenWords->setValue( cfg.preferences.fts.maxDistanceBetweenWords );
ui.checkBoxArticlesPerDictionary->setText( tr( "Max articles per dictionary (%1-%2):" )
.arg( QString::number( MinArticlesPerDictionary ) )
.arg( QString::number( MaxArticlesPerDictionary ) ) );
ui.checkBoxArticlesPerDictionary->setChecked( cfg.preferences.fts.useMaxArticlesPerDictionary );
ui.articlesPerDictionary->setMinimum( MinArticlesPerDictionary );
ui.articlesPerDictionary->setMaximum( MaxArticlesPerDictionary );
ui.articlesPerDictionary->setValue( cfg.preferences.fts.maxArticlesPerDictionary );
int mode = ui.searchMode->itemData( ui.searchMode->currentIndex() ).toInt();
if( mode == WholeWords || mode == PlainText )
{
ui.checkBoxIgnoreWordOrder->setChecked( ignoreWordsOrder );
ui.checkBoxIgnoreWordOrder->setEnabled( true );
}
else
{
ui.checkBoxIgnoreWordOrder->setChecked( false );
ui.checkBoxIgnoreWordOrder->setEnabled( false );
}
ui.checkBoxIgnoreDiacritics->setChecked( ignoreDiacritics );
ui.matchCase->setChecked( cfg.preferences.fts.matchCase );
setLimitsUsing();
connect( ui.checkBoxDistanceBetweenWords, SIGNAL( stateChanged( int ) ),
this, SLOT( setLimitsUsing() ) );
connect( ui.checkBoxArticlesPerDictionary, SIGNAL( stateChanged( int ) ),
this, SLOT( setLimitsUsing() ) );
connect( ui.searchMode, SIGNAL( currentIndexChanged( int ) ),
this, SLOT( setLimitsUsing() ) );
connect( ui.checkBoxIgnoreWordOrder, SIGNAL( stateChanged( int ) ),
this, SLOT( ignoreWordsOrderClicked() ) );
connect( ui.checkBoxIgnoreDiacritics, SIGNAL( stateChanged( int ) ),
this, SLOT( ignoreDiacriticsClicked() ) );
model = new HeadwordsListModel( this, results, activeDicts );
ui.headwordsView->setModel( model );
ui.articlesFoundLabel->setText( tr( "Articles found: " ) + "0" );
connect( ui.headwordsView, SIGNAL( clicked( QModelIndex ) ),
this, SLOT( itemClicked( QModelIndex ) ) );
connect( this, SIGNAL( finished( int ) ), this, SLOT( saveData() ) );
connect( ui.OKButton, SIGNAL( clicked() ), this, SLOT( accept() ) );
connect( ui.cancelButton, SIGNAL( clicked() ), this, SLOT( reject() ) );
connect( ui.helpButton, SIGNAL( clicked() ),
this, SLOT( helpRequested() ) );
helpAction.setShortcut( QKeySequence( "F1" ) );
helpAction.setShortcutContext( Qt::WidgetWithChildrenShortcut );
connect( &helpAction, SIGNAL( triggered() ),
this, SLOT( helpRequested() ) );
addAction( &helpAction );
ui.headwordsView->installEventFilter( this );
delegate = new WordListItemDelegate( ui.headwordsView->itemDelegate() );
if( delegate )
ui.headwordsView->setItemDelegate( delegate );
ui.searchLine->setText( static_cast< MainWindow * >( parent )->getTranslateLineText() );
ui.searchLine->selectAll();
}
FullTextSearchDialog::~FullTextSearchDialog()
{
if( delegate )
delegate->deleteLater();
}
void FullTextSearchDialog::stopSearch()
{
if( !searchReqs.empty() )
{
for( std::list< sptr< Dictionary::DataRequest > >::iterator it = searchReqs.begin();
it != searchReqs.end(); ++it )
if( !(*it)->isFinished() )
(*it)->cancel();
while( searchReqs.size() )
QApplication::processEvents();
}
}
void FullTextSearchDialog::showDictNumbers()
{
ui.totalDicts->setText( QString::number( activeDicts.size() ) );
unsigned ready = 0, toIndex = 0;
for( unsigned x = 0; x < activeDicts.size(); x++ )
{
if( activeDicts.at( x )->haveFTSIndex() )
ready++;
else
toIndex++;
}
ui.readyDicts->setText( QString::number( ready ) );
ui.toIndexDicts->setText( QString::number( toIndex ) );
}
void FullTextSearchDialog::saveData()
{
cfg.preferences.fts.searchMode = ui.searchMode->currentIndex();
cfg.preferences.fts.matchCase = ui.matchCase->isChecked();
cfg.preferences.fts.maxArticlesPerDictionary = ui.articlesPerDictionary->text().toInt();
cfg.preferences.fts.maxDistanceBetweenWords = ui.distanceBetweenWords->text().toInt();
cfg.preferences.fts.useMaxDistanceBetweenWords = ui.checkBoxDistanceBetweenWords->isChecked();
cfg.preferences.fts.useMaxArticlesPerDictionary = ui.checkBoxArticlesPerDictionary->isChecked();
cfg.preferences.fts.ignoreWordsOrder = ignoreWordsOrder;
cfg.preferences.fts.ignoreDiacritics = ignoreDiacritics;
cfg.preferences.fts.dialogGeometry = saveGeometry();
}
void FullTextSearchDialog::setNewIndexingName( QString name )
{
ui.nowIndexingLabel->setText( tr( "Now indexing: " )
+ ( name.isEmpty() ? tr( "None" ) : name ) );
showDictNumbers();
}
void FullTextSearchDialog::setLimitsUsing()
{
int mode = ui.searchMode->itemData( ui.searchMode->currentIndex() ).toInt();
if( mode == WholeWords || mode == PlainText )
{
ui.checkBoxDistanceBetweenWords->setEnabled( true );
ui.distanceBetweenWords->setEnabled( ui.checkBoxDistanceBetweenWords->isChecked() );
ui.checkBoxIgnoreWordOrder->setChecked( ignoreWordsOrder );
ui.checkBoxIgnoreWordOrder->setEnabled( true );
}
else
{
ui.checkBoxIgnoreWordOrder->setEnabled( false );
ui.checkBoxIgnoreWordOrder->setChecked( false );
ui.checkBoxDistanceBetweenWords->setEnabled( false );
ui.distanceBetweenWords->setEnabled( false );
}
ui.articlesPerDictionary->setEnabled( ui.checkBoxArticlesPerDictionary->isChecked() );
}
void FullTextSearchDialog::ignoreWordsOrderClicked()
{
ignoreWordsOrder = ui.checkBoxIgnoreWordOrder->isChecked();
}
void FullTextSearchDialog::ignoreDiacriticsClicked()
{
ignoreDiacritics = ui.checkBoxIgnoreDiacritics->isChecked();
}
void FullTextSearchDialog::accept()
{
QStringList list1, list2;
int mode = ui.searchMode->itemData( ui.searchMode->currentIndex() ).toInt();
int maxResultsPerDict = ui.checkBoxArticlesPerDictionary->isChecked() ?
ui.articlesPerDictionary->value() : -1;
int distanceBetweenWords = ui.checkBoxDistanceBetweenWords->isChecked() ?
ui.distanceBetweenWords->value() : -1;
model->clear();
matchedCount=0;
ui.articlesFoundLabel->setText( tr( "Articles found: " ) + QString::number( results.size() ) );
bool hasCJK;
if( !FtsHelpers::parseSearchString( ui.searchLine->text(), list1, list2,
searchRegExp, mode,
ui.matchCase->isChecked(),
distanceBetweenWords,
hasCJK, ignoreWordsOrder ) )
{
QMessageBox message( QMessageBox::Warning,
"GoldenDict",
tr( "The search line must contains at least one word containing " )
+ QString::number( MinimumWordSize ) + tr( " or more symbols" ),
QMessageBox::Ok,
this );
message.exec();
return;
}
if( activeDicts.empty() )
{
QMessageBox message( QMessageBox::Warning,
"GoldenDict",
tr( "No dictionaries for full-text search" ),
QMessageBox::Ok,
this );
message.exec();
return;
}
ui.OKButton->setEnabled( false );
ui.searchProgressBar->show();
// Make search requests
for( unsigned x = 0; x < activeDicts.size(); ++x )
{
if( !activeDicts[ x ] ->haveFTSIndex())
{
continue;
}
sptr< Dictionary::DataRequest > req = activeDicts[ x ]->getSearchResults(
ui.searchLine->text(),
mode,
ui.matchCase->isChecked(),
distanceBetweenWords,
maxResultsPerDict,
ignoreWordsOrder,
ignoreDiacritics
);
connect( req.get(), SIGNAL( finished() ),
this, SLOT( searchReqFinished() ), Qt::QueuedConnection );
connect( req.get(), SIGNAL( matchCount(int) ),
this, SLOT( matchCount(int) ), Qt::QueuedConnection );
searchReqs.push_back( req );
}
searchReqFinished(); // Handle any ones which have already finished
}
void FullTextSearchDialog::searchReqFinished()
{
QList< FtsHeadword > allHeadwords;
while ( searchReqs.size() )
{
std::list< sptr< Dictionary::DataRequest > >::iterator it;
for( it = searchReqs.begin(); it != searchReqs.end(); ++it )
{
if ( (*it)->isFinished() )
{
GD_DPRINTF( "one finished.\n" );
QString errorString = (*it)->getErrorString();
if ( (*it)->dataSize() >= 0 || errorString.size() )
{
QList< FtsHeadword > * headwords;
if( (unsigned)(*it)->dataSize() >= sizeof( headwords ) )
{
QList< FtsHeadword > hws;
try
{
(*it)->getDataSlice( 0, sizeof( headwords ), &headwords );
hws.swap( *headwords );
std::sort( hws.begin(), hws.end() );
delete headwords;
addSortedHeadwords( allHeadwords, hws );
}
catch( std::exception & e )
{
gdWarning( "getDataSlice error: %s\n", e.what() );
}
}
}
break;
}
}
if( it != searchReqs.end() )
{
GD_DPRINTF( "erasing..\n" );
searchReqs.erase( it );
GD_DPRINTF( "erase done..\n" );
continue;
}
else
break;
}
if( !allHeadwords.isEmpty() )
{
model->addResults( QModelIndex(), allHeadwords );
if( results.size() > matchedCount )
ui.articlesFoundLabel->setText( tr( "Articles found: " ) + QString::number( results.size() ) );
}
if ( searchReqs.empty() )
{
ui.searchProgressBar->hide();
ui.OKButton->setEnabled( true );
QApplication::beep();
}
}
void FullTextSearchDialog::matchCount(int _matchCount){
matchedCount+=_matchCount;
ui.articlesFoundLabel->setText( tr( "Articles found: " )
+ QString::number( matchedCount ) );
}
void FullTextSearchDialog::reject()
{
if( !searchReqs.empty() )
stopSearch();
else
{
saveData();
emit closeDialog();
}
}
void FullTextSearchDialog::itemClicked( const QModelIndex & idx )
{
if( idx.isValid() && idx.row() < results.size() )
{
QString headword = results[ idx.row() ].headword;
QRegExp reg;
#ifdef USE_XAPIAN
auto searchText = ui.searchLine->text();
searchText.replace( RX::Ftx::tokenBoundary, " " );
auto it = RX::Ftx::token.globalMatch(searchText);
QString firstAvailbeItem;
while( it.hasNext() )
{
QRegularExpressionMatch match = it.next();
auto p = match.captured();
if( p.startsWith( '-' ) )
continue;
//the searched text should be like "term".remove enclosed double quotation marks.
if(p.startsWith("\"")){
p.remove("\"");
}
firstAvailbeItem = p;
break;
}
if( !firstAvailbeItem.isEmpty() )
{
reg = QRegExp( firstAvailbeItem, Qt::CaseInsensitive, QRegExp::RegExp2 );
reg.setMinimal( true );
}
#else
if( !results[ idx.row() ].foundHiliteRegExps.isEmpty() )
{
reg = QRegExp( results[ idx.row() ].foundHiliteRegExps.join( "|"),
results[ idx.row() ].matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive,
QRegExp::RegExp2 );
reg.setMinimal( true );
}
else
reg = searchRegExp;
#endif
emit showTranslationFor( headword, results[ idx.row() ].dictIDs, reg, ignoreDiacritics );
}
}
void FullTextSearchDialog::updateDictionaries()
{
activeDicts.clear();
// Find the given group
Instances::Group const * activeGroup = 0;
for( unsigned x = 0; x < groups.size(); ++x )
if ( groups[ x ].id == group )
{
activeGroup = &groups[ x ];
break;
}
// If we've found a group, use its dictionaries; otherwise, use the global
// heap.
std::vector< sptr< Dictionary::Class > > const & groupDicts =
activeGroup ? activeGroup->dictionaries : dictionaries;
// Exclude muted dictionaries
Config::Group const * grp = cfg.getGroup( group );
Config::MutedDictionaries const * mutedDicts;
if( group == Instances::Group::AllGroupId )
mutedDicts = &cfg.mutedDictionaries;
else
mutedDicts = grp ? &grp->mutedDictionaries : 0;
if( mutedDicts && !mutedDicts->isEmpty() )
{
activeDicts.reserve( groupDicts.size() );
for( unsigned x = 0; x < groupDicts.size(); ++x )
if ( groupDicts[ x ]->canFTS()
&& !mutedDicts->contains( QString::fromStdString( groupDicts[ x ]->getId() ) )
)
activeDicts.push_back( groupDicts[ x ] );
}
else
{
for( unsigned x = 0; x < groupDicts.size(); ++x )
if ( groupDicts[ x ]->canFTS() )
activeDicts.push_back( groupDicts[ x ] );
}
showDictNumbers();
}
bool FullTextSearchDialog::eventFilter( QObject * obj, QEvent * ev )
{
if( obj == ui.headwordsView && ev->type() == QEvent::KeyPress )
{
QKeyEvent * kev = static_cast< QKeyEvent * >( ev );
if( kev->key() == Qt::Key_Return || kev->key() == Qt::Key_Enter )
{
itemClicked( ui.headwordsView->currentIndex() );
return true;
}
}
return QDialog::eventFilter( obj, ev );
}
void FullTextSearchDialog::helpRequested()
{
MainWindow * mainWindow = qobject_cast< MainWindow * >( parentWidget() );
if( mainWindow )
mainWindow->showGDHelpForID( "Full-text search" );
}
/// HeadwordsListModel
int HeadwordsListModel::rowCount( QModelIndex const & ) const
{
return headwords.size();
}
QVariant HeadwordsListModel::data( QModelIndex const & index, int role ) const
{
if( index.row() < 0 )
return QVariant();
FtsHeadword const & head = headwords[ index.row() ];
if ( head.headword.isEmpty() )
return QVariant();
switch ( role )
{
case Qt::ToolTipRole:
{
QString tt;
for( int x = 0; x < head.dictIDs.size(); x++ )
{
if( x != 0 )
tt += "<br>";
int n = getDictIndex( head.dictIDs[ x ] );
if( n != -1 )
tt += QString::fromUtf8( dictionaries[ n ]->getName().c_str() ) ;
}
return tt;
}
case Qt::DisplayRole :
return head.headword;
case Qt::EditRole :
return head.headword;
default:;
}
return QVariant();
}
void HeadwordsListModel::addResults(const QModelIndex & parent, QList< FtsHeadword > const & hws )
{
Q_UNUSED( parent );
beginResetModel();
addSortedHeadwords( headwords, hws );
endResetModel();
emit contentChanged();
}
bool HeadwordsListModel::clear()
{
beginResetModel();
headwords.clear();
endResetModel();
emit contentChanged();
return true;
}
int HeadwordsListModel::getDictIndex( QString const & id ) const
{
std::string dictID( id.toUtf8().data() );
for( unsigned x = 0; x < dictionaries.size(); x++ )
{
if( dictionaries[ x ]->getId().compare( dictID ) == 0 )
return x;
}
return -1;
}
QString FtsHeadword::trimQuotes( QString const & str ) const
{
QString trimmed( str );
int n = 0;
while( str[ n ] == '\"' || str[ n ] == '\'' )
n++;
if( n )
trimmed = trimmed.mid( n );
while( trimmed.endsWith( '\"' ) || trimmed.endsWith( '\'' ) )
trimmed.chop( 1 );
return trimmed;
}
bool FtsHeadword::operator <( FtsHeadword const & other ) const
{
QString first = trimQuotes( headword );
QString second = trimQuotes( other.headword );
int result = first.localeAwareCompare( second );
if( result )
return result < 0;
// Headwords without quotes are equal
if( first.size() != headword.size() || second.size() != other.headword.size() )
return headword.localeAwareCompare( other.headword ) < 0;
return false;
}
} // namespace FTS