mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
fix: skip invalid headword in some epwing dictionaries (#1096)
* fix: for some epwing dictionaries , skip invalid headword fix #1095 * [autofix.ci] apply automated fixes * fix: epwing detect next text block * [autofix.ci] apply automated fixes * fix: epwing getFirstHeadword do not need forwardtext * fix: epwing ,if epwing subbook does not contain headword ,use menu instead * [autofix.ci] apply automated fixes * fix: if epwing subbook does not contain headword ,use menu instead * [autofix.ci] apply automated fixes * fix:code smells --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
18c25b36ee
commit
2dd04207d1
|
@ -1220,16 +1220,26 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
ChunkedStorage::Writer chunks( idx );
|
||||
|
||||
Epwing::Book::EpwingHeadword head;
|
||||
|
||||
dict.getFirstHeadword( head );
|
||||
|
||||
int wordCount = 0;
|
||||
int articleCount = 0;
|
||||
|
||||
for ( ;; ) {
|
||||
addWordToChunks( head, chunks, indexedWords, wordCount, articleCount );
|
||||
if ( !dict.getNextHeadword( head ) )
|
||||
break;
|
||||
if ( dict.getFirstHeadword( head ) ) {
|
||||
for ( ;; ) {
|
||||
addWordToChunks( head, chunks, indexedWords, wordCount, articleCount );
|
||||
if ( !dict.getNextHeadword( head ) )
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
//the book does not contain text,use menu instead if any.
|
||||
if ( dict.getMenu( head ) ) {
|
||||
auto candidateItems = dict.candidate( head.page, head.offset );
|
||||
for ( Epwing::Book::EpwingHeadword word : candidateItems ) {
|
||||
addWordToChunks( word, chunks, indexedWords, wordCount, articleCount );
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw exEbLibrary( dict.errorString().toUtf8().data() );
|
||||
}
|
||||
}
|
||||
|
||||
dict.clearBuffers();
|
||||
|
|
|
@ -61,6 +61,7 @@ HookFunc( hook_mpeg );
|
|||
HookFunc( hook_narrow_font );
|
||||
HookFunc( hook_wide_font );
|
||||
HookFunc( hook_reference );
|
||||
HookFunc( hook_candidate );
|
||||
|
||||
const EB_Hook hooks[] = { { EB_HOOK_NEWLINE, hook_newline },
|
||||
{ EB_HOOK_ISO8859_1, hook_iso8859_1 },
|
||||
|
@ -89,6 +90,7 @@ const EB_Hook hooks[] = { { EB_HOOK_NEWLINE, hook_newline },
|
|||
{ EB_HOOK_WIDE_FONT, hook_wide_font },
|
||||
{ EB_HOOK_BEGIN_REFERENCE, hook_reference },
|
||||
{ EB_HOOK_END_REFERENCE, hook_reference },
|
||||
{ EB_HOOK_END_CANDIDATE_GROUP, hook_candidate },
|
||||
{ EB_HOOK_NULL, NULL } };
|
||||
|
||||
const EB_Hook refHooks[] = {
|
||||
|
@ -377,6 +379,21 @@ hook_reference( EB_Book * book, EB_Appendix *, void * container, EB_Hook_Code co
|
|||
return EB_SUCCESS;
|
||||
}
|
||||
|
||||
EB_Error_Code
|
||||
hook_candidate( EB_Book * book, EB_Appendix *, void * container, EB_Hook_Code code, int, const unsigned int * argv )
|
||||
{
|
||||
EContainer * cn = static_cast< EContainer * >( container );
|
||||
|
||||
if ( cn->textOnly )
|
||||
return EB_SUCCESS;
|
||||
|
||||
QByteArray str = cn->book->handleCandidate( code, argv );
|
||||
if ( !str.isEmpty() )
|
||||
eb_write_text( book, str.data(), str.size() );
|
||||
|
||||
return EB_SUCCESS;
|
||||
}
|
||||
|
||||
// EpwingBook class
|
||||
|
||||
EpwingBook::EpwingBook():
|
||||
|
@ -564,7 +581,7 @@ QString EpwingBook::createCacheDir( QString const & dirName )
|
|||
if ( !info.exists() || !info.isDir() ) {
|
||||
if ( !dir.mkdir( mainCacheDir ) ) {
|
||||
gdWarning( "Epwing: can't create cache directory \"%s\"", mainCacheDir.toUtf8().data() );
|
||||
return QString();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -573,7 +590,7 @@ QString EpwingBook::createCacheDir( QString const & dirName )
|
|||
if ( !info.exists() || !info.isDir() ) {
|
||||
if ( !dir.mkdir( cacheDir ) ) {
|
||||
gdWarning( "Epwing: can't create cache directory \"%s\"", cacheDir.toUtf8().data() );
|
||||
return QString();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return cacheDir;
|
||||
|
@ -649,7 +666,7 @@ QString EpwingBook::title()
|
|||
if ( codec_Euc )
|
||||
return codec_Euc->toUnicode( buf );
|
||||
|
||||
return QString();
|
||||
return {};
|
||||
}
|
||||
|
||||
QString EpwingBook::copyright()
|
||||
|
@ -657,7 +674,7 @@ QString EpwingBook::copyright()
|
|||
error_string.clear();
|
||||
|
||||
if ( !eb_have_copyright( &book ) )
|
||||
return QString();
|
||||
return {};
|
||||
|
||||
EB_Position position;
|
||||
EB_Error_Code ret = eb_copyright( &book, &position );
|
||||
|
@ -671,9 +688,18 @@ QString EpwingBook::copyright()
|
|||
return getText( position.page, position.offset, true );
|
||||
}
|
||||
|
||||
QList< EpwingHeadword > EpwingBook::candidate( int page, int offset )
|
||||
{
|
||||
//clear candidateItems in getText;
|
||||
candidateItems.clear();
|
||||
getText( page, offset, false );
|
||||
return candidateItems;
|
||||
}
|
||||
|
||||
QString EpwingBook::getText( int page, int offset, bool text_only )
|
||||
{
|
||||
error_string.clear();
|
||||
candidateItems.clear();
|
||||
|
||||
seekBookThrow( page, offset );
|
||||
|
||||
|
@ -701,7 +727,7 @@ QString EpwingBook::getText( int page, int offset, bool text_only )
|
|||
if ( buf.length() > TextSizeLimit ) {
|
||||
error_string = "Data too large";
|
||||
currentPosition.page = 0;
|
||||
return QString();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -822,7 +848,6 @@ QString EpwingBook::getPreviousTextWithLength( int page, int offset, int total,
|
|||
return text;
|
||||
}
|
||||
|
||||
|
||||
void EpwingBook::getReferencesFromText( int page, int offset )
|
||||
{
|
||||
error_string.clear();
|
||||
|
@ -872,15 +897,14 @@ EB_Error_Code EpwingBook::forwardText( EB_Position & startPos )
|
|||
}
|
||||
|
||||
ret = eb_forward_text( &book, &appendix );
|
||||
while ( ret == EB_ERR_END_OF_CONTENT ) {
|
||||
ret = eb_tell_text( &book, &startPos );
|
||||
if ( ret != EB_SUCCESS )
|
||||
break;
|
||||
while ( ret != EB_SUCCESS ) {
|
||||
|
||||
if ( startPos.page >= book.subbook_current->text.end_page )
|
||||
return EB_ERR_END_OF_CONTENT;
|
||||
|
||||
startPos.offset += 2;
|
||||
const auto offset = startPos.offset + 2;
|
||||
startPos.offset = offset % EB_SIZE_PAGE;
|
||||
startPos.page += offset / EB_SIZE_PAGE;
|
||||
currentPosition = startPos;
|
||||
|
||||
ret = eb_seek_text( &book, &startPos );
|
||||
|
@ -891,7 +915,7 @@ EB_Error_Code EpwingBook::forwardText( EB_Position & startPos )
|
|||
return ret;
|
||||
}
|
||||
|
||||
void EpwingBook::getFirstHeadword( EpwingHeadword & head )
|
||||
bool EpwingBook::getFirstHeadword( EpwingHeadword & head )
|
||||
{
|
||||
error_string.clear();
|
||||
|
||||
|
@ -900,13 +924,15 @@ void EpwingBook::getFirstHeadword( EpwingHeadword & head )
|
|||
EB_Error_Code ret = eb_text( &book, &pos );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_text", ret );
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
qWarning() << error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
ret = forwardText( pos );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "forwardText", ret );
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
setErrorString( "getFirstHeadword", ret );
|
||||
qWarning() << error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
eb_backward_text( &book, &appendix );
|
||||
|
@ -914,7 +940,49 @@ void EpwingBook::getFirstHeadword( EpwingHeadword & head )
|
|||
ret = eb_tell_text( &book, &pos );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_tell_text", ret );
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
qWarning() << error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
currentPosition = pos;
|
||||
indexHeadwordsPosition = pos;
|
||||
|
||||
head.page = pos.page;
|
||||
head.offset = pos.offset;
|
||||
|
||||
if ( !readHeadword( pos, head.headword, true ) ) {
|
||||
qWarning() << error_string;
|
||||
return false;
|
||||
}
|
||||
|
||||
fixHeadword( head.headword );
|
||||
|
||||
allHeadwordPositions[ ( (uint64_t)pos.page ) << 32 | ( pos.offset ) ] = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EpwingBook::haveMenu()
|
||||
{
|
||||
error_string.clear();
|
||||
|
||||
int ret = eb_have_menu( &book );
|
||||
return ret == 1;
|
||||
}
|
||||
|
||||
bool EpwingBook::getMenu( EpwingHeadword & head )
|
||||
{
|
||||
error_string.clear();
|
||||
|
||||
if ( !haveMenu() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
EB_Position pos;
|
||||
|
||||
EB_Error_Code ret = eb_menu( &book, &pos );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "getMenu", ret );
|
||||
return false;
|
||||
}
|
||||
|
||||
currentPosition = pos;
|
||||
|
@ -924,19 +992,18 @@ void EpwingBook::getFirstHeadword( EpwingHeadword & head )
|
|||
head.offset = pos.offset;
|
||||
|
||||
if ( !readHeadword( pos, head.headword, true ) )
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
return false;
|
||||
|
||||
fixHeadword( head.headword );
|
||||
|
||||
EWPos epos( pos.page, pos.offset );
|
||||
allHeadwordPositions[ ( (uint64_t)pos.page ) << 32 | ( pos.offset ) ] = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EpwingBook::getNextHeadword( EpwingHeadword & head )
|
||||
{
|
||||
EB_Position pos;
|
||||
|
||||
|
||||
// No queued positions - forward to next article
|
||||
|
||||
error_string.clear();
|
||||
|
@ -965,8 +1032,10 @@ bool EpwingBook::getNextHeadword( EpwingHeadword & head )
|
|||
head.page = pos.page;
|
||||
head.offset = pos.offset;
|
||||
|
||||
if ( !readHeadword( pos, head.headword, true ) )
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
if ( !readHeadword( pos, head.headword, true ) ) {
|
||||
qDebug() << "Epwing: ignore the following error=> " << error_string;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( head.headword.isEmpty() )
|
||||
continue;
|
||||
|
@ -1766,6 +1835,27 @@ QByteArray EpwingBook::handleReference( EB_Hook_Code code, const unsigned int *
|
|||
return str.toUtf8();
|
||||
}
|
||||
|
||||
QByteArray EpwingBook::handleCandidate( EB_Hook_Code code, const unsigned int * argv )
|
||||
{
|
||||
EpwingHeadword w_headword;
|
||||
w_headword.headword = currentCandidate();
|
||||
w_headword.page = argv[ 1 ];
|
||||
w_headword.offset = argv[ 2 ];
|
||||
|
||||
candidateItems << w_headword;
|
||||
return QByteArray{};
|
||||
}
|
||||
|
||||
QString EpwingBook::currentCandidate()
|
||||
{
|
||||
const char * s = eb_current_candidate( &book );
|
||||
if ( book.character_code == EB_CHARCODE_ISO8859_1 )
|
||||
return QString::fromLatin1( s );
|
||||
if ( codec_Euc )
|
||||
return codec_Euc->toUnicode( s );
|
||||
return QString{};
|
||||
}
|
||||
|
||||
bool EpwingBook::getMatches( QString word, QVector< QString > & matches )
|
||||
{
|
||||
QByteArray bword, bword2;
|
||||
|
|
|
@ -85,6 +85,7 @@ class EpwingBook
|
|||
QVector< EWPos > LinksQueue;
|
||||
int refOpenCount, refCloseCount;
|
||||
static QMutex libMutex;
|
||||
QList< EpwingHeadword > candidateItems;
|
||||
|
||||
QString createCacheDir( QString const & dir );
|
||||
|
||||
|
@ -184,6 +185,8 @@ public:
|
|||
|
||||
// Make name for resource
|
||||
QString makeFName( QString const & ext, int page, int offset ) const;
|
||||
QByteArray handleCandidate( EB_Hook_Code code, const unsigned * argv );
|
||||
QString currentCandidate();
|
||||
|
||||
// Store all files in Epwing folder
|
||||
static void collectFilenames( QString const & directory, vector< string > & files );
|
||||
|
@ -199,10 +202,13 @@ public:
|
|||
QString getCurrentSubBookDirectory();
|
||||
|
||||
QString copyright();
|
||||
QList< EpwingHeadword > candidate( int page, int offset );
|
||||
QString title();
|
||||
|
||||
// Seek to first article
|
||||
void getFirstHeadword( EpwingHeadword & head );
|
||||
bool getFirstHeadword( EpwingHeadword & head );
|
||||
bool haveMenu();
|
||||
bool getMenu( EpwingHeadword & head );
|
||||
|
||||
// Find next headword and article position
|
||||
bool getNextHeadword( EpwingHeadword & head );
|
||||
|
|
Loading…
Reference in a new issue