mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 15:24:05 +00:00
feature: epwing dictionary reference navigation improved.
feature: add branch method to handle the reference navigation loadArticle function feature: refactor epwing loadArticle method epwing: add previous and next page link
This commit is contained in:
parent
f7d6328f40
commit
c4674a246f
|
@ -66,3 +66,6 @@ QRegularExpression Mdx::styleElment( R"((<style[^>]*>)([\w\W]*?)(<\/style>))",
|
|||
|
||||
|
||||
QRegularExpression Zim::linkSpecialChar("[\\.\\/]");
|
||||
|
||||
|
||||
QRegularExpression Epwing::refWord(R"([r|p](\d+)at(\d+))", QRegularExpression::CaseInsensitiveOption);
|
||||
|
|
|
@ -50,6 +50,11 @@ class Zim{
|
|||
static QRegularExpression linkSpecialChar;
|
||||
};
|
||||
|
||||
class Epwing{
|
||||
public:
|
||||
static QRegularExpression refWord;
|
||||
};
|
||||
|
||||
} // namespace RX
|
||||
|
||||
#endif // GLOBALREGEX_HH
|
||||
|
|
110
epwing.cc
110
epwing.cc
|
@ -13,6 +13,7 @@
|
|||
#include <QtConcurrent>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <QObject>
|
||||
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
|
@ -24,6 +25,7 @@
|
|||
#include "utf8.hh"
|
||||
#include "filetype.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "base/globalregex.hh"
|
||||
|
||||
namespace Epwing {
|
||||
|
||||
|
@ -175,6 +177,9 @@ private:
|
|||
int & articlePage,
|
||||
int & articleOffset );
|
||||
|
||||
void loadArticleNextPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
|
||||
void loadArticlePreviousPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
|
||||
|
||||
void loadArticle( int articlePage, int articleOffset, string & articleHeadword,
|
||||
string & articleText );
|
||||
|
||||
|
@ -194,6 +199,8 @@ private:
|
|||
friend class EpwingArticleRequest;
|
||||
friend class EpwingResourceRequest;
|
||||
friend class EpwingWordSearchRequest;
|
||||
string epwing_previous_button(int& articleOffset, int& articlePage);
|
||||
string epwing_next_button(int& articleOffset, int& articlePage);
|
||||
};
|
||||
|
||||
|
||||
|
@ -323,6 +330,77 @@ void EpwingDictionary::loadArticle(
|
|||
articleText = prefix + articleText + "</div>";
|
||||
}
|
||||
|
||||
string Epwing::EpwingDictionary::epwing_previous_button(int& articlePage, int& articleOffset)
|
||||
{
|
||||
QString previousButton = QString( "p%1At%2" ).arg( articlePage ).arg( articleOffset );
|
||||
string previousLink = "<p><a class=\"epwing_previous_page\" href=\"gdlookup://localhost/"
|
||||
+ previousButton.toStdString() + "\">" + QObject::tr( "Previous Page" ).toStdString() + "</a></p>";
|
||||
|
||||
return previousLink;
|
||||
}
|
||||
|
||||
void EpwingDictionary::loadArticleNextPage(string & articleHeadword, string & articleText, int & articlePage, int & articleOffset )
|
||||
{
|
||||
QString headword, text;
|
||||
EB_Position pos;
|
||||
try
|
||||
{
|
||||
Mutex::Lock _( eBook.getLibMutex() );
|
||||
pos = eBook.getArticleNextPage( headword, text, articlePage, articleOffset, false );
|
||||
}
|
||||
catch( std::exception & e )
|
||||
{
|
||||
text = QString( "Article reading error: %1")
|
||||
.arg( QString::fromUtf8( e.what() ) );
|
||||
}
|
||||
|
||||
articleHeadword = string( headword.toUtf8().data() );
|
||||
articleText = string( text.toUtf8().data() );
|
||||
|
||||
string prefix( "<div class=\"epwing_text\">" );
|
||||
string previousLink = epwing_previous_button(articlePage, articleOffset);
|
||||
|
||||
articleText = prefix + previousLink + articleText;
|
||||
string nextLink = epwing_next_button(pos.page, pos.offset);
|
||||
articleText = articleText + nextLink;
|
||||
articleText = articleText + "</div>";
|
||||
}
|
||||
|
||||
string Epwing::EpwingDictionary::epwing_next_button(int& articlePage, int& articleOffset )
|
||||
{
|
||||
QString refLink = QString( "r%1At%2" ).arg( articlePage ).arg( articleOffset );
|
||||
string nextLink = "<p><a class=\"epwing_next_page\" href=\"gdlookup://localhost/" + refLink.toStdString() + "\">"
|
||||
+ QObject::tr( "Next Page" ).toStdString() + "</a></p>";
|
||||
|
||||
return nextLink;
|
||||
}
|
||||
|
||||
void EpwingDictionary::loadArticlePreviousPage(
|
||||
string & articleHeadword, string & articleText, int & articlePage, int & articleOffset )
|
||||
{
|
||||
QString headword, text;
|
||||
EB_Position pos;
|
||||
try
|
||||
{
|
||||
Mutex::Lock _( eBook.getLibMutex() );
|
||||
pos = eBook.getArticlePreviousPage( headword, text, articlePage, articleOffset, false );
|
||||
} catch( std::exception & e )
|
||||
{
|
||||
text = QString( "Article reading error: %1" ).arg( QString::fromUtf8( e.what() ) );
|
||||
}
|
||||
|
||||
articleHeadword = string( headword.toUtf8().data() );
|
||||
articleText = string( text.toUtf8().data() );
|
||||
|
||||
string prefix( "<div class=\"epwing_text\">" );
|
||||
|
||||
string previousLink = epwing_previous_button(pos.page, pos.offset );
|
||||
articleText = prefix + previousLink + articleText;
|
||||
string nextLink = epwing_next_button( articlePage, articleOffset );
|
||||
articleText = articleText + nextLink;
|
||||
articleText = articleText + "</div>";
|
||||
}
|
||||
|
||||
void EpwingDictionary::loadArticle( int articlePage,
|
||||
int articleOffset,
|
||||
string & articleHeadword,
|
||||
|
@ -521,11 +599,7 @@ void EpwingArticleRequest::run()
|
|||
|
||||
try
|
||||
{
|
||||
dict.loadArticle( chain[ x ].articleOffset,
|
||||
headword,
|
||||
articleText,
|
||||
articlePage,
|
||||
articleOffset );
|
||||
dict.loadArticle( chain[ x ].articleOffset, headword, articleText, articlePage, articleOffset );
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
|
@ -562,7 +636,11 @@ void EpwingArticleRequest::run()
|
|||
getBuiltInArticle( alts[ x ], pages, offsets, alternateArticles );
|
||||
}
|
||||
|
||||
if ( mainArticles.empty() && alternateArticles.empty() )
|
||||
|
||||
QRegularExpressionMatch m = RX::Epwing::refWord.match( gd::toQString( word ) );
|
||||
bool ref = m.hasMatch();
|
||||
|
||||
if ( mainArticles.empty() && alternateArticles.empty() && !ref)
|
||||
{
|
||||
// No such word
|
||||
finish();
|
||||
|
@ -589,6 +667,26 @@ void EpwingArticleRequest::run()
|
|||
result += i->second.second;
|
||||
}
|
||||
|
||||
{
|
||||
QRegularExpressionMatch m = RX::Epwing::refWord.match( gd::toQString( word ) );
|
||||
if( m.hasMatch() )
|
||||
{
|
||||
string headword, articleText;
|
||||
int articlePage = m.captured( 1 ).toInt();
|
||||
int articleOffset = m.captured( 2 ).toInt();
|
||||
if( word[ 0 ] =='r' )
|
||||
dict.loadArticleNextPage( headword, articleText, articlePage, articleOffset );
|
||||
else
|
||||
{
|
||||
//starts with p
|
||||
dict.loadArticlePreviousPage( headword, articleText, articlePage, articleOffset );
|
||||
}
|
||||
|
||||
result += articleText;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
result += "</div>";
|
||||
|
||||
Mutex::Lock _( dataMutex );
|
||||
|
|
209
epwing_book.cc
209
epwing_book.cc
|
@ -14,7 +14,7 @@
|
|||
#include "wstring_qt.hh"
|
||||
#include "folding.hh"
|
||||
#include "epwing_charmap.hh"
|
||||
|
||||
#include "htmlescape.hh"
|
||||
#if defined( Q_OS_WIN32 ) || defined( Q_OS_MAC )
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
#endif
|
||||
|
@ -687,18 +687,7 @@ QString EpwingBook::getText( int page, int offset, bool text_only )
|
|||
{
|
||||
error_string.clear();
|
||||
|
||||
EB_Position pos;
|
||||
pos.page = page;
|
||||
pos.offset = offset;
|
||||
currentPosition = pos;
|
||||
|
||||
EB_Error_Code ret = eb_seek_text(&book, &pos);
|
||||
if( ret != EB_SUCCESS )
|
||||
{
|
||||
setErrorString( "eb_seek_text", ret );
|
||||
currentPosition.page = 0;
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
}
|
||||
seekBookThrow( page, offset );
|
||||
|
||||
QByteArray buf;
|
||||
char buffer[ TextBufferSize + 1 ];
|
||||
|
@ -710,7 +699,7 @@ QString EpwingBook::getText( int page, int offset, bool text_only )
|
|||
|
||||
for( ; ; )
|
||||
{
|
||||
ret = eb_read_text( &book, &appendix, &hookSet, &container,
|
||||
EB_Error_Code ret = eb_read_text( &book, &appendix, &hookSet, &container,
|
||||
TextBufferSize, buffer, &buffer_length );
|
||||
|
||||
if( ret != EB_SUCCESS )
|
||||
|
@ -737,6 +726,129 @@ QString EpwingBook::getText( int page, int offset, bool text_only )
|
|||
return text;
|
||||
}
|
||||
|
||||
void EpwingBook::seekBookThrow( int page, int offset )
|
||||
{
|
||||
EB_Position pos;
|
||||
pos.page = page;
|
||||
pos.offset = offset;
|
||||
currentPosition = pos;
|
||||
|
||||
EB_Error_Code ret = eb_seek_text( &book, &pos );
|
||||
if( ret != EB_SUCCESS )
|
||||
{
|
||||
setErrorString( "eb_seek_text", ret );
|
||||
currentPosition.page = 0;
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
}
|
||||
}
|
||||
|
||||
QString EpwingBook::getTextWithLength( int page, int offset, int total, EB_Position & pos )
|
||||
{
|
||||
error_string.clear();
|
||||
int currentLength = 0;
|
||||
|
||||
seekBookThrow( page, offset );
|
||||
|
||||
QByteArray buf;
|
||||
char buffer[ TextBufferSize + 1 ];
|
||||
ssize_t buffer_length;
|
||||
EContainer container( this, false );
|
||||
|
||||
prepareToRead();
|
||||
|
||||
for( ;; )
|
||||
{
|
||||
EB_Error_Code ret = eb_read_text( &book, &appendix, &hookSet, &container, TextBufferSize, buffer, &buffer_length );
|
||||
|
||||
if( ret != EB_SUCCESS )
|
||||
{
|
||||
setErrorString( "eb_read_text", ret );
|
||||
break;
|
||||
}
|
||||
|
||||
buf += QByteArray( buffer, buffer_length );
|
||||
currentLength += buffer_length;
|
||||
|
||||
if( currentLength > total || buffer_length == 0 )
|
||||
break;
|
||||
|
||||
if( buf.length() > TextSizeLimit )
|
||||
{
|
||||
error_string = "Data too large";
|
||||
currentPosition.page = 0;
|
||||
return QString();
|
||||
}
|
||||
|
||||
ret = eb_forward_text( &book, &appendix );
|
||||
if( ret != EB_SUCCESS )
|
||||
{
|
||||
setErrorString( "eb_seek_text", ret );
|
||||
currentPosition.page = 0;
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
}
|
||||
}
|
||||
|
||||
eb_tell_text( &book, &pos );
|
||||
QString text = QString::fromUtf8( buf.data(), buf.size() ).trimmed();
|
||||
finalizeText( text );
|
||||
return text;
|
||||
}
|
||||
|
||||
QString EpwingBook::getPreviousTextWithLength( int page, int offset, int total, EB_Position & pos )
|
||||
{
|
||||
error_string.clear();
|
||||
int currentLength = 0;
|
||||
|
||||
QByteArray buf;
|
||||
char buffer[ TextBufferSize + 1 ];
|
||||
ssize_t buffer_length;
|
||||
EContainer container( this, false );
|
||||
|
||||
prepareToRead();
|
||||
|
||||
for( ;; )
|
||||
{
|
||||
seekBookThrow( page, offset );
|
||||
EB_Error_Code ret = eb_backward_text( &book, &appendix );
|
||||
if( ret != EB_SUCCESS )
|
||||
{
|
||||
setErrorString( "eb_backward_text", ret );
|
||||
currentPosition.page = 0;
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
}
|
||||
eb_tell_text( &book, &pos );
|
||||
page = pos.page;
|
||||
offset = pos.offset;
|
||||
|
||||
ret = eb_read_text( &book, &appendix, &hookSet, &container, TextBufferSize, buffer, &buffer_length );
|
||||
|
||||
if( ret != EB_SUCCESS )
|
||||
{
|
||||
setErrorString( "eb_read_text", ret );
|
||||
break;
|
||||
}
|
||||
|
||||
buf.prepend( QByteArray( buffer, buffer_length ));
|
||||
currentLength += buffer_length;
|
||||
|
||||
if( currentLength > total || buffer_length == 0 )
|
||||
break;
|
||||
|
||||
if( buf.length() > TextSizeLimit )
|
||||
{
|
||||
error_string = "Data too large";
|
||||
currentPosition.page = 0;
|
||||
return QString();
|
||||
}
|
||||
}
|
||||
|
||||
QString text = QString::fromUtf8( buf.data(), buf.size() ).trimmed();
|
||||
finalizeText( text );
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void EpwingBook::getReferencesFromText( int page, int offset )
|
||||
{
|
||||
error_string.clear();
|
||||
|
@ -934,6 +1046,7 @@ bool EpwingBook::processRef( EpwingHeadword & head)
|
|||
pos.page = epos.first;
|
||||
pos.offset = epos.second;
|
||||
|
||||
// epwing ebook use ref link to navigate , the headword(at such position) usually has no meaningful point.
|
||||
if( readHeadword( pos, head.headword, true ) )
|
||||
{
|
||||
if( head.headword.isEmpty() || head.headword.contains( badLinks ) )
|
||||
|
@ -944,13 +1057,16 @@ bool EpwingBook::processRef( EpwingHeadword & head)
|
|||
head.page = pos.page;
|
||||
head.offset = pos.offset;
|
||||
auto key = ( (uint64_t)pos.page ) << 32 | ( pos.offset );
|
||||
if( !allRefPositions.contains( key ) )
|
||||
//this only add the existed reference point which has already in the headwords as another headword(rxxxxAtxxxx) in the headword list.
|
||||
//this will make the loadarticle's real reference link judgement easier.
|
||||
|
||||
if( allRefPositions.contains( key ) )
|
||||
{
|
||||
// fixed the reference headword ,to avoid the headword collision with other entry .
|
||||
//if(!allHeadwordPositions.contains(key))
|
||||
head.headword = QString( "r%1At%2" ).arg( pos.page ).arg( pos.offset );
|
||||
|
||||
allRefPositions[ key ] = true;
|
||||
//allRefPositions[ key ] = true;
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -1080,6 +1196,10 @@ void EpwingBook::fixHeadword( QString & headword )
|
|||
//if( isHeadwordCorrect( fixed ) )
|
||||
// headword = fixed;
|
||||
|
||||
//remove leading number and space.
|
||||
QRegularExpression leadingNumAndSpace( R"(^[\d\s]+\b)" );
|
||||
fixed.remove( leadingNumAndSpace );
|
||||
|
||||
headword = fixed;
|
||||
}
|
||||
|
||||
|
@ -1087,28 +1207,30 @@ void EpwingBook::getArticle( QString & headword, QString & articleText,
|
|||
int page, int offset, bool text_only)
|
||||
{
|
||||
error_string.clear();
|
||||
char buffer[ TextBufferSize + 1 ];
|
||||
|
||||
EB_Position pos;
|
||||
pos.page = page;
|
||||
pos.offset = offset;
|
||||
seekBookThrow( page, offset );
|
||||
|
||||
currentPosition = pos;
|
||||
readHeadword( headword, text_only );
|
||||
|
||||
EB_Error_Code ret = eb_seek_text( &book, &pos );
|
||||
if( ret != EB_SUCCESS )
|
||||
{
|
||||
setErrorString( "eb_seek_text", ret );
|
||||
throw exEbLibrary( error_string.toUtf8().data() );
|
||||
}
|
||||
QString hw = Html::unescape( headword, true );
|
||||
fixHeadword( hw );
|
||||
|
||||
auto parts = hw.split( QChar::Space, Qt::SkipEmptyParts );
|
||||
articleText = getText( page, offset, text_only );
|
||||
|
||||
}
|
||||
|
||||
void EpwingBook::readHeadword(QString & headword, bool text_only)
|
||||
{
|
||||
EContainer container( this, text_only );
|
||||
ssize_t length;
|
||||
|
||||
prepareToRead();
|
||||
|
||||
ret = eb_read_heading( &book, &appendix, &hookSet, &container,
|
||||
TextBufferSize, buffer, &length );
|
||||
char buffer[ TextBufferSize + 1 ];
|
||||
|
||||
EB_Error_Code ret = eb_read_heading( &book, &appendix, &hookSet, &container,
|
||||
TextBufferSize, buffer, &length );
|
||||
if( ret != EB_SUCCESS )
|
||||
{
|
||||
setErrorString( "eb_read_heading", ret );
|
||||
|
@ -1120,8 +1242,35 @@ void EpwingBook::getArticle( QString & headword, QString & articleText,
|
|||
|
||||
if( text_only )
|
||||
fixHeadword( headword );
|
||||
}
|
||||
|
||||
articleText = getText( pos.page, pos.offset, text_only);
|
||||
EB_Position EpwingBook::getArticleNextPage(
|
||||
QString & headword, QString & articleText,
|
||||
int page, int offset, bool text_only)
|
||||
{
|
||||
error_string.clear();
|
||||
|
||||
seekBookThrow( page, offset );
|
||||
|
||||
readHeadword( headword, text_only );
|
||||
|
||||
EB_Position pos;
|
||||
articleText = getTextWithLength( page, offset, 4000, pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
EB_Position EpwingBook::getArticlePreviousPage(
|
||||
QString & headword, QString & articleText, int page, int offset, bool text_only )
|
||||
{
|
||||
error_string.clear();
|
||||
|
||||
seekBookThrow( page, offset );
|
||||
|
||||
readHeadword( headword, text_only );
|
||||
|
||||
EB_Position pos;
|
||||
articleText = getPreviousTextWithLength( page, offset, 4000, pos );
|
||||
return pos;
|
||||
}
|
||||
|
||||
const char * EpwingBook::beginDecoration( unsigned int code )
|
||||
|
|
|
@ -100,6 +100,9 @@ class EpwingBook
|
|||
|
||||
// Retrieve article text from dictionary
|
||||
QString getText( int page, int offset, bool text_only);
|
||||
void seekBookThrow( int page, int offset );
|
||||
QString getTextWithLength( int page, int offset, int total, EB_Position & pos );
|
||||
QString getPreviousTextWithLength( int page, int offset, int total, EB_Position & pos );
|
||||
|
||||
unsigned int normalizeDecorationCode( unsigned int code );
|
||||
|
||||
|
@ -196,7 +199,11 @@ public:
|
|||
// Retrieve article from dictionary
|
||||
void getArticle( QString & headword, QString & articleText,
|
||||
int page, int offset, bool text_only );
|
||||
void readHeadword( QString & headword, bool text_only);
|
||||
|
||||
EB_Position getArticleNextPage( QString & headword, QString & articleText,
|
||||
int page, int offset, bool text_only );
|
||||
EB_Position getArticlePreviousPage( QString & headword, QString & articleText, int page, int offset, bool text_only );
|
||||
const char * beginDecoration( unsigned int code );
|
||||
const char * endDecoration( unsigned int code );
|
||||
|
||||
|
|
Loading…
Reference in a new issue