diff --git a/epwing.cc b/epwing.cc index a87d9f08..9aa0eb68 100644 --- a/epwing.cc +++ b/epwing.cc @@ -203,6 +203,7 @@ private: friend class EpwingWordSearchRequest; string epwing_previous_button(int& articleOffset, int& articlePage); string epwing_next_button(int& articleOffset, int& articlePage); + bool readHeadword( EB_Position & pos, QString & headword ); }; @@ -578,8 +579,8 @@ void EpwingArticleRequest::run() multimap< wstring, pair< string, string > > mainArticles, alternateArticles; set< quint32 > articlesIncluded; // Some synonims make it that the articles - // appear several times. We combat this - // by only allowing them to appear once. + // appear several times. We combat this + // by only allowing them to appear once. wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); if( ignoreDiacritics ) @@ -1043,6 +1044,20 @@ sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( return std::make_shared( *this, str, minLength, (int)maxSuffixVariation, false, maxResults ); } +bool Epwing::EpwingDictionary::readHeadword( EB_Position & pos, QString & headword ) +{ + try + { + Mutex::Lock _( eBook.getLibMutex() ); + eBook.readHeadword( pos,headword, true); + eBook.fixHeadword( headword ); + return eBook.isHeadwordCorrect( headword ) ; + } + catch( std::exception & e ) + { + return false; + } +} } // anonymous namespace @@ -1258,11 +1273,6 @@ vector< sptr< Dictionary::Class > > makeDictionaries( break; } - while( dict.processRef( head ) ) - { - addWordToChunks( head, chunks, indexedWords, wordCount, articleCount ); - } - dict.clearBuffers(); // Finish with the chunks diff --git a/epwing_book.cc b/epwing_book.cc index c88e482d..0ef2ef20 100644 --- a/epwing_book.cc +++ b/epwing_book.cc @@ -1002,8 +1002,6 @@ bool EpwingBook::getNextHeadword( EpwingHeadword & head ) indexHeadwordsPosition = pos; - - head.page = pos.page; head.offset = pos.offset; @@ -1033,54 +1031,6 @@ bool EpwingBook::getNextHeadword( EpwingHeadword & head ) return true; } -bool EpwingBook::processRef( EpwingHeadword & head) -{ - EB_Position pos; - - QRegularExpression badLinks( "#(v|n)\\d", QRegularExpression::UseUnicodePropertiesOption ); - while( !LinksQueue.isEmpty() ) - { - EWPos epos = LinksQueue.last(); - LinksQueue.pop_back(); - - pos.page = epos.first; - pos.offset = epos.second; - - // epwing ebook use ref link to navigate , the headword(at such position) usually has no meaningful point. - if( readHeadword( pos, head.headword, true ) ) - { - if( head.headword.isEmpty() || head.headword.contains( badLinks ) ) - continue; - - fixHeadword( head.headword ); - - head.page = pos.page; - head.offset = pos.offset; - auto key = ( (uint64_t)pos.page ) << 32 | ( pos.offset ); - //this only add the existed reference point which has already in the headwords as another headword(rxxxxAtxxxx) in the headword list. - //this will make the loadarticle's real reference link judgement easier. - if( allHeadwordPositions.contains( key ) ) - { - // fixed the reference headword ,to avoid the headword collision with other entry . - //if(!allHeadwordPositions.contains(key)) - head.headword = QString( "r%1at%2" ).arg( pos.page ).arg( pos.offset ); - - //allRefPositions[ key ] = true; - - try - { - getReferencesFromText( pos.page, pos.offset); - } - catch( std::exception & ) - { - } - return true; - } - } - } - return false; -} - bool EpwingBook::readHeadword( EB_Position const& pos, QString & headword, bool text_only ) @@ -1199,6 +1149,13 @@ void EpwingBook::fixHeadword( QString & headword ) QRegularExpression leadingNumAndSpace( R"(^[\d\s]+\b)" ); fixed.remove( leadingNumAndSpace ); + auto parts = fixed.split( ' ', Qt::SkipEmptyParts ); + if( parts.size() > 2 ) { + //only return the first parts to avoid duplication + headword = QString( "%1 %2" ).arg( parts[ 0 ], parts[ 1 ] ); + return; + } + headword = fixed; } diff --git a/epwing_book.hh b/epwing_book.hh index e0a8054d..8149e8e5 100644 --- a/epwing_book.hh +++ b/epwing_book.hh @@ -186,8 +186,6 @@ public: // Find next headword and article position bool getNextHeadword( EpwingHeadword & head ); - bool processRef( EpwingHeadword & head ); - bool readHeadword( EB_Position const & pos, QString & headword, bool text_only );