From 467b2d17b3d3fb0635022fc00a09216a9efca58e Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Mon, 23 May 2022 00:13:24 +0800 Subject: [PATCH 1/2] fix: duplicated entries in epwing dictionary --- epwing_book.cc | 42 +++++++++++++++++++++++++++++++++--------- epwing_book.hh | 2 +- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/epwing_book.cc b/epwing_book.cc index bec5995d..111f031a 100644 --- a/epwing_book.cc +++ b/epwing_book.cc @@ -850,7 +850,7 @@ void EpwingBook::getFirstHeadword( EpwingHeadword & head ) fixHeadword( head.headword ); EWPos epos( pos.page, pos.offset ); - allHeadwordPositions[ head.headword ] = epos; + allHeadwordPositions[ head.headword ] << epos; } bool EpwingBook::getNextHeadword( EpwingHeadword & head ) @@ -881,13 +881,25 @@ bool EpwingBook::getNextHeadword( EpwingHeadword & head ) if( allHeadwordPositions.contains( head.headword ) ) { - EWPos epos = allHeadwordPositions[ head.headword ]; - if( pos.page != epos.first || abs( pos.offset - epos.second ) > 4 ) + // existed position + bool existed = false; + foreach( EWPos epos, allHeadwordPositions[ head.headword ] ) + { + if( pos.page == epos.first && abs( pos.offset - epos.second ) <= 4 ) + { + existed = true; + break; + } + } + if( !existed ) + { + allHeadwordPositions[ head.headword ] << EWPos( pos.page, pos.offset ); return true; + } } else { - allHeadwordPositions[ head.headword ] = EWPos( pos.page, pos.offset ); + allHeadwordPositions[ head.headword ]< 4 ) - break; + // existed position + bool existed = false; + foreach( EWPos epos, allHeadwordPositions[ head.headword ] ) + { + if( pos.page == epos.first && abs( pos.offset - epos.second ) <= 4 ) + { + existed = true; + break; + } + } + if( !existed ) + { + allHeadwordPositions[ head.headword ] << EWPos( pos.page, pos.offset ); + return true; + } } else { - allHeadwordPositions[ head.headword ] = EWPos( pos.page, pos.offset ); - break; + allHeadwordPositions[ head.headword ]< baseFontsMap, customFontsMap; QVector< int > refPages, refOffsets; - QMap< QString, EWPos > allHeadwordPositions; + QMap< QString, QList > allHeadwordPositions; QVector< EWPos > LinksQueue; int refOpenCount, refCloseCount; static Mutex libMutex; From b41bef9a314c73458eaded8ae3517acf9e7fa1eb Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Mon, 23 May 2022 00:26:12 +0800 Subject: [PATCH 2/2] fix: skip too long headword in epwing dictionary --- epwing.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/epwing.cc b/epwing.cc index cd5eec5c..9d67cd2d 100644 --- a/epwing.cc +++ b/epwing.cc @@ -1054,7 +1054,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries( for( ; ; ) { - if( !head.headword.isEmpty() ) + //skip too long headword + if( !head.headword.isEmpty() && head.headword.size() < 30 ) { uint32_t offset = chunks.startNewBlock(); chunks.addToBlock( &head.page, sizeof( head.page ) );