mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-23 20:14:05 +00:00
epwing: ignore reference link processing when parse the dictionary
This commit is contained in:
parent
22f9d5d832
commit
ad087b1031
24
epwing.cc
24
epwing.cc
|
@ -203,6 +203,7 @@ private:
|
|||
friend class EpwingWordSearchRequest;
|
||||
string epwing_previous_button(int& articleOffset, int& articlePage);
|
||||
string epwing_next_button(int& articleOffset, int& articlePage);
|
||||
bool readHeadword( EB_Position & pos, QString & headword );
|
||||
};
|
||||
|
||||
|
||||
|
@ -578,8 +579,8 @@ void EpwingArticleRequest::run()
|
|||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< quint32 > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if( ignoreDiacritics )
|
||||
|
@ -1043,6 +1044,20 @@ sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch(
|
|||
return std::make_shared<EpwingWordSearchRequest>( *this, str, minLength, (int)maxSuffixVariation,
|
||||
false, maxResults );
|
||||
}
|
||||
bool Epwing::EpwingDictionary::readHeadword( EB_Position & pos, QString & headword )
|
||||
{
|
||||
try
|
||||
{
|
||||
Mutex::Lock _( eBook.getLibMutex() );
|
||||
eBook.readHeadword( pos,headword, true);
|
||||
eBook.fixHeadword( headword );
|
||||
return eBook.isHeadwordCorrect( headword ) ;
|
||||
}
|
||||
catch( std::exception & e )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
|
@ -1258,11 +1273,6 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
break;
|
||||
}
|
||||
|
||||
while( dict.processRef( head ) )
|
||||
{
|
||||
addWordToChunks( head, chunks, indexedWords, wordCount, articleCount );
|
||||
}
|
||||
|
||||
dict.clearBuffers();
|
||||
|
||||
// Finish with the chunks
|
||||
|
|
|
@ -1002,8 +1002,6 @@ bool EpwingBook::getNextHeadword( EpwingHeadword & head )
|
|||
|
||||
indexHeadwordsPosition = pos;
|
||||
|
||||
|
||||
|
||||
head.page = pos.page;
|
||||
head.offset = pos.offset;
|
||||
|
||||
|
@ -1033,54 +1031,6 @@ bool EpwingBook::getNextHeadword( EpwingHeadword & head )
|
|||
return true;
|
||||
}
|
||||
|
||||
bool EpwingBook::processRef( EpwingHeadword & head)
|
||||
{
|
||||
EB_Position pos;
|
||||
|
||||
QRegularExpression badLinks( "#(v|n)\\d", QRegularExpression::UseUnicodePropertiesOption );
|
||||
while( !LinksQueue.isEmpty() )
|
||||
{
|
||||
EWPos epos = LinksQueue.last();
|
||||
LinksQueue.pop_back();
|
||||
|
||||
pos.page = epos.first;
|
||||
pos.offset = epos.second;
|
||||
|
||||
// epwing ebook use ref link to navigate , the headword(at such position) usually has no meaningful point.
|
||||
if( readHeadword( pos, head.headword, true ) )
|
||||
{
|
||||
if( head.headword.isEmpty() || head.headword.contains( badLinks ) )
|
||||
continue;
|
||||
|
||||
fixHeadword( head.headword );
|
||||
|
||||
head.page = pos.page;
|
||||
head.offset = pos.offset;
|
||||
auto key = ( (uint64_t)pos.page ) << 32 | ( pos.offset );
|
||||
//this only add the existed reference point which has already in the headwords as another headword(rxxxxAtxxxx) in the headword list.
|
||||
//this will make the loadarticle's real reference link judgement easier.
|
||||
if( allHeadwordPositions.contains( key ) )
|
||||
{
|
||||
// fixed the reference headword ,to avoid the headword collision with other entry .
|
||||
//if(!allHeadwordPositions.contains(key))
|
||||
head.headword = QString( "r%1at%2" ).arg( pos.page ).arg( pos.offset );
|
||||
|
||||
//allRefPositions[ key ] = true;
|
||||
|
||||
try
|
||||
{
|
||||
getReferencesFromText( pos.page, pos.offset);
|
||||
}
|
||||
catch( std::exception & )
|
||||
{
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool EpwingBook::readHeadword( EB_Position const& pos,
|
||||
QString & headword,
|
||||
bool text_only )
|
||||
|
@ -1199,6 +1149,13 @@ void EpwingBook::fixHeadword( QString & headword )
|
|||
QRegularExpression leadingNumAndSpace( R"(^[\d\s]+\b)" );
|
||||
fixed.remove( leadingNumAndSpace );
|
||||
|
||||
auto parts = fixed.split( ' ', Qt::SkipEmptyParts );
|
||||
if( parts.size() > 2 ) {
|
||||
//only return the first parts to avoid duplication
|
||||
headword = QString( "%1 %2" ).arg( parts[ 0 ], parts[ 1 ] );
|
||||
return;
|
||||
}
|
||||
|
||||
headword = fixed;
|
||||
}
|
||||
|
||||
|
|
|
@ -186,8 +186,6 @@ public:
|
|||
// Find next headword and article position
|
||||
bool getNextHeadword( EpwingHeadword & head );
|
||||
|
||||
bool processRef( EpwingHeadword & head );
|
||||
|
||||
bool readHeadword( EB_Position const & pos,
|
||||
QString & headword,
|
||||
bool text_only );
|
||||
|
|
Loading…
Reference in a new issue