From 6bf6dd46df2cd3832d544b508631fbbd0f8c36e4 Mon Sep 17 00:00:00 2001 From: jjzz Date: Wed, 7 Jan 2015 07:08:13 -0500 Subject: [PATCH] update ZimDictionary::convert(), proper display --- zim.cc | 105 +++++++++++++++++++++++++-------------------------------- 1 file changed, 45 insertions(+), 60 deletions(-) diff --git a/zim.cc b/zim.cc index abc88841..6c3088dd 100644 --- a/zim.cc +++ b/zim.cc @@ -597,93 +597,78 @@ string ZimDictionary::convert( const string & in ) { QString text = QString::fromUtf8( in.c_str() ); + // replace background text.replace( QRegExp( "<\\s*body\\s*([^>]*)background:([^;\"]*)" ), QString( "]*)src=(\"|)(\\.\\.|)/" ), QString( "<\\1 \\2src=\\3bres://%1/").arg( getId().c_str() ) ); // Fix links without '"' text.replace( QRegExp( "href=(\\.\\.|)/([^\\s>]+)" ), QString( "href=\"\\1/\\2\"" ) ); + // pattern text.replace( QRegExp( "<\\s*link\\s*([^>]*)href=\"(\\.\\.|)/" ), QString( "]*)href=\"(?!(http(s|)|ftp)://)(/|)[^\"]*\"\\s*title=\"([^\"]*)\"", + // localize the en.wiki***.com|org series links + text.replace( QRegExp( "<\\s*a\\s+(class=\"external\"\\s+)href=\"http(s|)://en\\.(wiki(pedia|books|news|quote|source|versity)|wiktionary)\\.(org|com)/wiki/" ), + QString( ", excluding any known protocols such as http://, mailto:, #(comment) + // these links will be translated into local definitions + QRegExp rxLink( "<\\s*a\\s+([^>]*)href=\"(?!(\\w+://|#|mailto:|tel:))(/|)([^\"]*)\"\\s*(title=\"[^\"]*\")?[^>]*>", Qt::CaseSensitive, QRegExp::RegExp2 ); - - QRegExp linkRegexp2( "<\\s*a\\s*([^>]*)href=\"(\\.\\.|)/([^\"]*)\"", - Qt::CaseSensitive, - QRegExp::RegExp2 ); - - QRegExp linkRegexp3( "\\.(s|)htm(l|)", Qt::CaseInsensitive ); - int pos = 0; - while( pos >= 0 ) + while( (pos = rxLink.indexIn( text, pos )) >= 0 ) { - pos = linkRegexp1.indexIn( text, pos ); - if( pos < 0 ) - break; + QStringList list = rxLink.capturedTexts(); + QString tag = list[3]; // a url, ex: Precambrian_Chaotian.html + if ( !list[4].isEmpty() ) // a title, ex: title="Precambrian/Chaotian" + tag = list[4].split("\"")[1]; - QStringList list = linkRegexp1.capturedTexts(); - - QString tag = QString( "" ); + text.replace( pos, list[0].length(), tag ); pos += tag.length() + 1; } + // Occassionally words needs to be displayed in vertical, but
were changed to somewhere + // proper style:
N
e
o
p
t
e
r
a
+ QRegExp rxBR( "(]*>)\\s*((\\w\\s*<br(\\\\|/|)>\\s*)+\\w)\\s*", + Qt::CaseSensitive, + QRegExp::RegExp2 ); pos = 0; - while( pos >= 0 ) + while( (pos = rxBR.indexIn( text, pos )) >= 0 ) { - pos = linkRegexp2.indexIn( text, pos ); - if( pos < 0 ) - break; - - QStringList list = linkRegexp2.capturedTexts(); - - QString tag = QString( "" ). + prepend( list[1] ). + append( "" ); + text.replace( pos, list[0].length(), tag ); pos += tag.length() + 1; } + // // output all links in the page - only for analysis + // QRegExp rxPrintAllLinks( "<\\s*a\\s+[^>]*href=\"[^\"]*\"[^>]*>", + // Qt::CaseSensitive, + // QRegExp::RegExp2 ); + // pos = 0; + // while( (pos = rxPrintAllLinks.indexIn( text, pos )) >= 0 ) + // { + // QStringList list = rxPrintAllLinks.capturedTexts(); + // qDebug() << "\n--Alllinks--" << list[0]; + // pos += list[0].length() + 1; + // } + // Fix outstanding elements text += "
";