Merge pull request #1040 from xiaoyifang/fix/fts-compact

opt: full text index compact
This commit is contained in:
xiaoyifang 2023-08-05 12:52:40 +08:00 committed by GitHub
commit 91f9e9c51a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 48 deletions

View file

@ -68,4 +68,23 @@ std::string basename( std::string const & str )
return std::string( str, x + 1 );
}
void removeDirectory( QString const & directory )
{
QDir dir( directory );
Q_FOREACH ( QFileInfo info,
dir.entryInfoList( QDir::NoDotAndDotDot | QDir::AllDirs | QDir::Files, QDir::DirsFirst ) ) {
if ( info.isDir() )
removeDirectory( info.absoluteFilePath() );
else
QFile::remove( info.absoluteFilePath() );
}
dir.rmdir( directory );
}
void removeDirectory( string const & directory )
{
removeDirectory( QString::fromStdString( directory ) );
}
} // namespace Utils::Fs

View file

@ -332,7 +332,9 @@ char separator();
/// Returns the name part of the given filename.
string basename( string const & );
void removeDirectory( QString const & directory );
void removeDirectory( string const & directory );
} // namespace Fs
} // namespace Utils

View file

@ -14,10 +14,6 @@
#include <vector>
#include <string>
#include <QRegularExpression>
using std::vector;
using std::string;
@ -49,49 +45,6 @@ bool ftsIndexIsOldOrBad( BtreeIndexing::BtreeDictionary * dict )
}
}
void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list )
{
QStringList wordList, hieroglyphList;
for ( auto word : list ) {
// Check for CJK symbols in word
bool parsed = false;
QString hieroglyph;
for ( int x = 0; x < word.size(); x++ )
if ( Utils::isCJKChar( word.at( x ).unicode() ) ) {
parsed = true;
hieroglyph.append( word[ x ] );
if ( QChar( word.at( x ) ).isHighSurrogate() && QChar( word[ x + 1 ] ).isLowSurrogate() )
hieroglyph.append( word[ ++x ] );
hieroglyphList.append( hieroglyph );
hieroglyph.clear();
}
// If word don't contains CJK symbols put it in list as is
if ( !parsed )
wordList.append( word );
}
indexWords = wordList.filter( wordRegExp );
indexWords.removeDuplicates();
hieroglyphList.removeDuplicates();
indexWords += hieroglyphList;
}
bool containCJK( QString const & str )
{
bool hasCJK = false;
for ( auto x : str )
if ( Utils::isCJKChar( x.unicode() ) ) {
hasCJK = true;
break;
}
return hasCJK;
}
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
{
QMutexLocker _( &dict->getFtsMutex() );
@ -105,7 +58,7 @@ void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancell
throw exUserAbort();
// Open the database for update, creating a new database if necessary.
Xapian::WritableDatabase db( dict->ftsIndexName(), Xapian::DB_CREATE_OR_OPEN );
Xapian::WritableDatabase db( dict->ftsIndexName() + "_temp", Xapian::DB_CREATE_OR_OPEN );
Xapian::TermGenerator indexer;
// Xapian::Stem stemmer("english");
@ -206,6 +159,10 @@ void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancell
offsets.clear();
db.commit();
db.compact( dict->ftsIndexName() );
Utils::Fs::removeDirectory( dict->ftsIndexName() + "_temp" );
}
catch ( Xapian::Error & e ) {
qWarning() << "create xapian index:" << QString::fromStdString( e.get_description() );