feature: epwing dictionary reference navigation improved.

feature: add branch method to handle the reference navigation loadArticle function

feature: refactor epwing loadArticle method

epwing: add previous and next page link
This commit is contained in:
Xiao YiFang 2023-03-12 15:04:49 +08:00 committed by xiaoyifang
parent f7d6328f40
commit c4674a246f
5 changed files with 298 additions and 36 deletions

View file

@ -66,3 +66,6 @@ QRegularExpression Mdx::styleElment( R"((<style[^>]*>)([\w\W]*?)(<\/style>))",
QRegularExpression Zim::linkSpecialChar("[\\.\\/]");
QRegularExpression Epwing::refWord(R"([r|p](\d+)at(\d+))", QRegularExpression::CaseInsensitiveOption);

View file

@ -50,6 +50,11 @@ class Zim{
static QRegularExpression linkSpecialChar;
};
class Epwing{
public:
static QRegularExpression refWord;
};
} // namespace RX
#endif // GLOBALREGEX_HH

110
epwing.cc
View file

@ -13,6 +13,7 @@
#include <QtConcurrent>
#include <set>
#include <string>
#include <QObject>
#include "btreeidx.hh"
#include "folding.hh"
@ -24,6 +25,7 @@
#include "utf8.hh"
#include "filetype.hh"
#include "ftshelpers.hh"
#include "base/globalregex.hh"
namespace Epwing {
@ -175,6 +177,9 @@ private:
int & articlePage,
int & articleOffset );
void loadArticleNextPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
void loadArticlePreviousPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
void loadArticle( int articlePage, int articleOffset, string & articleHeadword,
string & articleText );
@ -194,6 +199,8 @@ private:
friend class EpwingArticleRequest;
friend class EpwingResourceRequest;
friend class EpwingWordSearchRequest;
string epwing_previous_button(int& articleOffset, int& articlePage);
string epwing_next_button(int& articleOffset, int& articlePage);
};
@ -323,6 +330,77 @@ void EpwingDictionary::loadArticle(
articleText = prefix + articleText + "</div>";
}
string Epwing::EpwingDictionary::epwing_previous_button(int& articlePage, int& articleOffset)
{
QString previousButton = QString( "p%1At%2" ).arg( articlePage ).arg( articleOffset );
string previousLink = "<p><a class=\"epwing_previous_page\" href=\"gdlookup://localhost/"
+ previousButton.toStdString() + "\">" + QObject::tr( "Previous Page" ).toStdString() + "</a></p>";
return previousLink;
}
void EpwingDictionary::loadArticleNextPage(string & articleHeadword, string & articleText, int & articlePage, int & articleOffset )
{
QString headword, text;
EB_Position pos;
try
{
Mutex::Lock _( eBook.getLibMutex() );
pos = eBook.getArticleNextPage( headword, text, articlePage, articleOffset, false );
}
catch( std::exception & e )
{
text = QString( "Article reading error: %1")
.arg( QString::fromUtf8( e.what() ) );
}
articleHeadword = string( headword.toUtf8().data() );
articleText = string( text.toUtf8().data() );
string prefix( "<div class=\"epwing_text\">" );
string previousLink = epwing_previous_button(articlePage, articleOffset);
articleText = prefix + previousLink + articleText;
string nextLink = epwing_next_button(pos.page, pos.offset);
articleText = articleText + nextLink;
articleText = articleText + "</div>";
}
string Epwing::EpwingDictionary::epwing_next_button(int& articlePage, int& articleOffset )
{
QString refLink = QString( "r%1At%2" ).arg( articlePage ).arg( articleOffset );
string nextLink = "<p><a class=\"epwing_next_page\" href=\"gdlookup://localhost/" + refLink.toStdString() + "\">"
+ QObject::tr( "Next Page" ).toStdString() + "</a></p>";
return nextLink;
}
void EpwingDictionary::loadArticlePreviousPage(
string & articleHeadword, string & articleText, int & articlePage, int & articleOffset )
{
QString headword, text;
EB_Position pos;
try
{
Mutex::Lock _( eBook.getLibMutex() );
pos = eBook.getArticlePreviousPage( headword, text, articlePage, articleOffset, false );
} catch( std::exception & e )
{
text = QString( "Article reading error: %1" ).arg( QString::fromUtf8( e.what() ) );
}
articleHeadword = string( headword.toUtf8().data() );
articleText = string( text.toUtf8().data() );
string prefix( "<div class=\"epwing_text\">" );
string previousLink = epwing_previous_button(pos.page, pos.offset );
articleText = prefix + previousLink + articleText;
string nextLink = epwing_next_button( articlePage, articleOffset );
articleText = articleText + nextLink;
articleText = articleText + "</div>";
}
void EpwingDictionary::loadArticle( int articlePage,
int articleOffset,
string & articleHeadword,
@ -521,11 +599,7 @@ void EpwingArticleRequest::run()
try
{
dict.loadArticle( chain[ x ].articleOffset,
headword,
articleText,
articlePage,
articleOffset );
dict.loadArticle( chain[ x ].articleOffset, headword, articleText, articlePage, articleOffset );
}
catch(...)
{
@ -562,7 +636,11 @@ void EpwingArticleRequest::run()
getBuiltInArticle( alts[ x ], pages, offsets, alternateArticles );
}
if ( mainArticles.empty() && alternateArticles.empty() )
QRegularExpressionMatch m = RX::Epwing::refWord.match( gd::toQString( word ) );
bool ref = m.hasMatch();
if ( mainArticles.empty() && alternateArticles.empty() && !ref)
{
// No such word
finish();
@ -589,6 +667,26 @@ void EpwingArticleRequest::run()
result += i->second.second;
}
{
QRegularExpressionMatch m = RX::Epwing::refWord.match( gd::toQString( word ) );
if( m.hasMatch() )
{
string headword, articleText;
int articlePage = m.captured( 1 ).toInt();
int articleOffset = m.captured( 2 ).toInt();
if( word[ 0 ] =='r' )
dict.loadArticleNextPage( headword, articleText, articlePage, articleOffset );
else
{
//starts with p
dict.loadArticlePreviousPage( headword, articleText, articlePage, articleOffset );
}
result += articleText;
}
}
result += "</div>";
Mutex::Lock _( dataMutex );

View file

@ -14,7 +14,7 @@
#include "wstring_qt.hh"
#include "folding.hh"
#include "epwing_charmap.hh"
#include "htmlescape.hh"
#if defined( Q_OS_WIN32 ) || defined( Q_OS_MAC )
#define _FILE_OFFSET_BITS 64
#endif
@ -687,18 +687,7 @@ QString EpwingBook::getText( int page, int offset, bool text_only )
{
error_string.clear();
EB_Position pos;
pos.page = page;
pos.offset = offset;
currentPosition = pos;
EB_Error_Code ret = eb_seek_text(&book, &pos);
if( ret != EB_SUCCESS )
{
setErrorString( "eb_seek_text", ret );
currentPosition.page = 0;
throw exEbLibrary( error_string.toUtf8().data() );
}
seekBookThrow( page, offset );
QByteArray buf;
char buffer[ TextBufferSize + 1 ];
@ -710,7 +699,7 @@ QString EpwingBook::getText( int page, int offset, bool text_only )
for( ; ; )
{
ret = eb_read_text( &book, &appendix, &hookSet, &container,
EB_Error_Code ret = eb_read_text( &book, &appendix, &hookSet, &container,
TextBufferSize, buffer, &buffer_length );
if( ret != EB_SUCCESS )
@ -737,6 +726,129 @@ QString EpwingBook::getText( int page, int offset, bool text_only )
return text;
}
void EpwingBook::seekBookThrow( int page, int offset )
{
EB_Position pos;
pos.page = page;
pos.offset = offset;
currentPosition = pos;
EB_Error_Code ret = eb_seek_text( &book, &pos );
if( ret != EB_SUCCESS )
{
setErrorString( "eb_seek_text", ret );
currentPosition.page = 0;
throw exEbLibrary( error_string.toUtf8().data() );
}
}
QString EpwingBook::getTextWithLength( int page, int offset, int total, EB_Position & pos )
{
error_string.clear();
int currentLength = 0;
seekBookThrow( page, offset );
QByteArray buf;
char buffer[ TextBufferSize + 1 ];
ssize_t buffer_length;
EContainer container( this, false );
prepareToRead();
for( ;; )
{
EB_Error_Code ret = eb_read_text( &book, &appendix, &hookSet, &container, TextBufferSize, buffer, &buffer_length );
if( ret != EB_SUCCESS )
{
setErrorString( "eb_read_text", ret );
break;
}
buf += QByteArray( buffer, buffer_length );
currentLength += buffer_length;
if( currentLength > total || buffer_length == 0 )
break;
if( buf.length() > TextSizeLimit )
{
error_string = "Data too large";
currentPosition.page = 0;
return QString();
}
ret = eb_forward_text( &book, &appendix );
if( ret != EB_SUCCESS )
{
setErrorString( "eb_seek_text", ret );
currentPosition.page = 0;
throw exEbLibrary( error_string.toUtf8().data() );
}
}
eb_tell_text( &book, &pos );
QString text = QString::fromUtf8( buf.data(), buf.size() ).trimmed();
finalizeText( text );
return text;
}
QString EpwingBook::getPreviousTextWithLength( int page, int offset, int total, EB_Position & pos )
{
error_string.clear();
int currentLength = 0;
QByteArray buf;
char buffer[ TextBufferSize + 1 ];
ssize_t buffer_length;
EContainer container( this, false );
prepareToRead();
for( ;; )
{
seekBookThrow( page, offset );
EB_Error_Code ret = eb_backward_text( &book, &appendix );
if( ret != EB_SUCCESS )
{
setErrorString( "eb_backward_text", ret );
currentPosition.page = 0;
throw exEbLibrary( error_string.toUtf8().data() );
}
eb_tell_text( &book, &pos );
page = pos.page;
offset = pos.offset;
ret = eb_read_text( &book, &appendix, &hookSet, &container, TextBufferSize, buffer, &buffer_length );
if( ret != EB_SUCCESS )
{
setErrorString( "eb_read_text", ret );
break;
}
buf.prepend( QByteArray( buffer, buffer_length ));
currentLength += buffer_length;
if( currentLength > total || buffer_length == 0 )
break;
if( buf.length() > TextSizeLimit )
{
error_string = "Data too large";
currentPosition.page = 0;
return QString();
}
}
QString text = QString::fromUtf8( buf.data(), buf.size() ).trimmed();
finalizeText( text );
return text;
}
void EpwingBook::getReferencesFromText( int page, int offset )
{
error_string.clear();
@ -934,6 +1046,7 @@ bool EpwingBook::processRef( EpwingHeadword & head)
pos.page = epos.first;
pos.offset = epos.second;
// epwing ebook use ref link to navigate , the headword(at such position) usually has no meaningful point.
if( readHeadword( pos, head.headword, true ) )
{
if( head.headword.isEmpty() || head.headword.contains( badLinks ) )
@ -944,13 +1057,16 @@ bool EpwingBook::processRef( EpwingHeadword & head)
head.page = pos.page;
head.offset = pos.offset;
auto key = ( (uint64_t)pos.page ) << 32 | ( pos.offset );
if( !allRefPositions.contains( key ) )
//this only add the existed reference point which has already in the headwords as another headword(rxxxxAtxxxx) in the headword list.
//this will make the loadarticle's real reference link judgement easier.
if( allRefPositions.contains( key ) )
{
// fixed the reference headword ,to avoid the headword collision with other entry .
//if(!allHeadwordPositions.contains(key))
head.headword = QString( "r%1At%2" ).arg( pos.page ).arg( pos.offset );
allRefPositions[ key ] = true;
//allRefPositions[ key ] = true;
try
{
@ -1080,6 +1196,10 @@ void EpwingBook::fixHeadword( QString & headword )
//if( isHeadwordCorrect( fixed ) )
// headword = fixed;
//remove leading number and space.
QRegularExpression leadingNumAndSpace( R"(^[\d\s]+\b)" );
fixed.remove( leadingNumAndSpace );
headword = fixed;
}
@ -1087,28 +1207,30 @@ void EpwingBook::getArticle( QString & headword, QString & articleText,
int page, int offset, bool text_only)
{
error_string.clear();
char buffer[ TextBufferSize + 1 ];
EB_Position pos;
pos.page = page;
pos.offset = offset;
seekBookThrow( page, offset );
currentPosition = pos;
readHeadword( headword, text_only );
EB_Error_Code ret = eb_seek_text( &book, &pos );
if( ret != EB_SUCCESS )
{
setErrorString( "eb_seek_text", ret );
throw exEbLibrary( error_string.toUtf8().data() );
}
QString hw = Html::unescape( headword, true );
fixHeadword( hw );
auto parts = hw.split( QChar::Space, Qt::SkipEmptyParts );
articleText = getText( page, offset, text_only );
}
void EpwingBook::readHeadword(QString & headword, bool text_only)
{
EContainer container( this, text_only );
ssize_t length;
prepareToRead();
ret = eb_read_heading( &book, &appendix, &hookSet, &container,
TextBufferSize, buffer, &length );
char buffer[ TextBufferSize + 1 ];
EB_Error_Code ret = eb_read_heading( &book, &appendix, &hookSet, &container,
TextBufferSize, buffer, &length );
if( ret != EB_SUCCESS )
{
setErrorString( "eb_read_heading", ret );
@ -1120,8 +1242,35 @@ void EpwingBook::getArticle( QString & headword, QString & articleText,
if( text_only )
fixHeadword( headword );
}
articleText = getText( pos.page, pos.offset, text_only);
EB_Position EpwingBook::getArticleNextPage(
QString & headword, QString & articleText,
int page, int offset, bool text_only)
{
error_string.clear();
seekBookThrow( page, offset );
readHeadword( headword, text_only );
EB_Position pos;
articleText = getTextWithLength( page, offset, 4000, pos);
return pos;
}
EB_Position EpwingBook::getArticlePreviousPage(
QString & headword, QString & articleText, int page, int offset, bool text_only )
{
error_string.clear();
seekBookThrow( page, offset );
readHeadword( headword, text_only );
EB_Position pos;
articleText = getPreviousTextWithLength( page, offset, 4000, pos );
return pos;
}
const char * EpwingBook::beginDecoration( unsigned int code )

View file

@ -100,6 +100,9 @@ class EpwingBook
// Retrieve article text from dictionary
QString getText( int page, int offset, bool text_only);
void seekBookThrow( int page, int offset );
QString getTextWithLength( int page, int offset, int total, EB_Position & pos );
QString getPreviousTextWithLength( int page, int offset, int total, EB_Position & pos );
unsigned int normalizeDecorationCode( unsigned int code );
@ -196,7 +199,11 @@ public:
// Retrieve article from dictionary
void getArticle( QString & headword, QString & articleText,
int page, int offset, bool text_only );
void readHeadword( QString & headword, bool text_only);
EB_Position getArticleNextPage( QString & headword, QString & articleText,
int page, int offset, bool text_only );
EB_Position getArticlePreviousPage( QString & headword, QString & articleText, int page, int offset, bool text_only );
const char * beginDecoration( unsigned int code );
const char * endDecoration( unsigned int code );