MDict: More optimize links handling

This commit is contained in:
Abs62 2016-11-13 16:33:22 +03:00
parent 404999c4e5
commit eff09c675a

131
mdx.cc
View file

@ -990,103 +990,122 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
QString id = QString::fromStdString( getId() ); QString id = QString::fromStdString( getId() );
QString uniquePrefix = QString::fromLatin1( "g" ) + id + "_" + articleId + "_"; QString uniquePrefix = QString::fromLatin1( "g" ) + id + "_" + articleId + "_";
QRegExp allLinksRe("(<\\s*(a(?:rea)?|link|script|img)\\s+[^>]*>)", Qt::CaseInsensitive ); QRegExp allLinksRe( "(?:<\\s*(a(?:rea)?|img|link|script)(?:\\s+[^>]+|\\s*)>)", Qt::CaseInsensitive );
QRegExp wordCrossLink( "(href\\s*=\\s*[\"'])entry://([^#\"']+)(#?[^\"']*)", Qt::CaseInsensitive ); QRegExp wordCrossLink( "([\\s\"']href\\s*=)\\s*([\"'])entry://([^>#]*)((?:#[^>]*)?)\\2", Qt::CaseInsensitive );
QRegExp anchorLinkRe( "(<\\s*a\\s+[^>]*\\b(?:name|id)\\b\\s*=\\s*[\"']*)(?=[^\"'])", Qt::CaseInsensitive ); wordCrossLink.setMinimal( true );
anchorLinkRe.setMinimal( true );
QRegExp anchorLinkRe2( "(href\\s*=\\s*[\"'])entry://#", Qt::CaseInsensitive ); QRegExp anchorIdRe( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)", Qt::CaseInsensitive );
QRegExp audioRe( "(<\\s*(?:a|area)\\s+[^>]*\\bhref\\b\\s*=\\s*\")sound://([^\"']*)", Qt::CaseInsensitive ); QRegExp anchorIdRe2( "([\\s\"'](?:name|id)\\s*=)\\s*(?=[^\"'])([^\\s\">]+)", Qt::CaseInsensitive );
QRegExp stylesRe( "(<\\s*link\\s+[^>]*\\bhref\\b\\s*=\\s*[\"']+)(?!bres:|data:)(?:file://)?[\\x00-\\x30\\x7f]*(?:/?)([^\"']*)", QRegExp anchorLinkRe( "([\\s\"']href\\s*=\\s*[\"'])entry://#", Qt::CaseInsensitive );
QRegExp audioRe( "([\\s\"']href\\s*=)\\s*([\"'])sound://([^\">]+)\\2", Qt::CaseInsensitive );
audioRe.setMinimal( true );
QRegExp stylesRe( "([\\s\"']href\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2",
Qt::CaseInsensitive, QRegExp::RegExp2 ); Qt::CaseInsensitive, QRegExp::RegExp2 );
QRegExp stylesRe2( "(<\\s*link\\s+[^>]*\\bhref\\b\\s*=\\s*)(?!['\"]+)(?!bres:|data:)(?:file://)?(?:/?)([^\\s>]+)", stylesRe.setMinimal( true );
QRegExp stylesRe2( "([\\s\"']href\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)",
Qt::CaseInsensitive, QRegExp::RegExp2 ); Qt::CaseInsensitive, QRegExp::RegExp2 );
QRegExp inlineScriptRe( "<\\s*script\\s*>", Qt::CaseInsensitive, QRegExp::RegExp2 ); QRegExp inlineScriptRe( "<\\s*script(?:(?=\\s)(?:(?![\\s\"']src\\s*=)[^>])+|\\s*)>", Qt::CaseInsensitive, QRegExp::RegExp2 );
QRegExp closeScriptTagRe( "<\\s*/script\\s*>", Qt::CaseInsensitive, QRegExp::RegExp2 ); QRegExp closeScriptTagRe( "<\\s*/script\\s*>", Qt::CaseInsensitive, QRegExp::RegExp2 );
QRegExp scriptRe( "(<\\s*script\\s+[^>]*\\bsrc\\b\\s*=\\s*[\"']+)(?!bres:|data:)(?:file://)?[\\x00-\\x30\\x7f]*(?:/?)([^\"']*)", QRegExp srcRe( "([\\s\"']src\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2",
Qt::CaseInsensitive, QRegExp::RegExp2 ); Qt::CaseInsensitive, QRegExp::RegExp2 );
QRegExp scriptRe2( "(<\\s*script\\s+[^>]*\\bsrc\\b\\s*=\\s*)(?!['\"]+)(?!bres:|data:)(?:file://)?(?:/?)([^\\s>]+)", srcRe.setMinimal( true );
Qt::CaseInsensitive, QRegExp::RegExp2 ); QRegExp srcRe2( "([\\s\"']src\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)",
QRegExp imgRe( "(<\\s*img\\s+[^>]*\\bsrc\\b\\s*=\\s*[\"']+)(?!bres:|data:|http:|https:)(?:file://)?[\\x00-\\x1f\\x7f]*(?:/?)([^\"']*)",
Qt::CaseInsensitive, QRegExp::RegExp2 );
QRegExp imgRe2( "(<\\s*img\\s+[^>]*\\bsrc\\b\\s*=\\s*)(?!['\"]+)(?!bres:|data:|http:|https:)(?:file://)?(?:/?)([^\\s>]+)",
Qt::CaseInsensitive, QRegExp::RegExp2 ); Qt::CaseInsensitive, QRegExp::RegExp2 );
int linkPos = 0; int linkPos = 0;
while( linkPos >= 0 ) while( linkPos >= 0 )
{ {
linkPos = article.indexOf( allLinksRe, linkPos ); linkPos = allLinksRe.indexIn( article, linkPos );
if( linkPos < 0 ) if( linkPos < 0 )
break; break;
QString linkTxt = allLinksRe.cap( 0 ); QString linkTxt = allLinksRe.cap( 0 );
QString linkType = allLinksRe.cap( 2 ); QString linkType = allLinksRe.cap( 1 ).toLower();
QString newLink; QString newLink;
if( !linkType.isEmpty() && linkType.at( 0 ).toLower() == 'a' ) if( !linkType.isEmpty() && linkType.at( 0 ) == 'a' )
{ {
newLink = linkTxt int pos = anchorIdRe.indexIn( linkTxt );
// anchors
.replace( anchorLinkRe, "\\1" + uniquePrefix )
.replace( anchorLinkRe2, "\\1#" + uniquePrefix )
// sounds, and audio link script
.replace( audioRe,
QString::fromStdString( addAudioLink( "\"gdau://" + getId() + "/\\2\"", getId() ) ) +
"\\1gdau://" + id + "/\\2" );
int pos = wordCrossLink.indexIn( newLink );
if( pos >= 0 ) if( pos >= 0 )
{ {
QString newTxt = wordCrossLink.cap( 1 ) QString newText = anchorIdRe.cap( 1 ) + anchorIdRe.cap( 2 ) + uniquePrefix;
newLink = linkTxt.replace( pos, anchorIdRe.cap().length(), newText );
}
else
newLink = linkTxt.replace( anchorIdRe2, "\\1\"" + uniquePrefix + "\\2\"" );
newLink = newLink.replace( anchorLinkRe, "\\1#" + uniquePrefix );
pos = audioRe.indexIn( newLink );
if( pos >= 0 )
{
// sounds and audio link script
QString newTxt = audioRe.cap( 1 ) + audioRe.cap( 2 )
+ "gdau://" + id + "/"
+ audioRe.cap( 3 ) + audioRe.cap( 2 );
newLink = QString::fromUtf8( addAudioLink( "\"gdau://" + getId() + "/" + audioRe.cap( 3 ).toUtf8().data() + "\"", getId() ).c_str() )
+ newLink.replace( pos, audioRe.cap().length(), newTxt );
}
pos = wordCrossLink.indexIn( newLink );
if( pos >= 0 )
{
QString newTxt = wordCrossLink.cap( 1 ) + wordCrossLink.cap( 2 )
+ "gdlookup://localhost/" + "gdlookup://localhost/"
+ wordCrossLink.cap( 2 ); + wordCrossLink.cap( 3 );
if( !wordCrossLink.cap( 3 ).isEmpty() ) if( !wordCrossLink.cap( 4 ).isEmpty() )
newTxt += QString( "?gdanchor=" ) + uniquePrefix + wordCrossLink.cap( 3 ).mid( 1 ); newTxt += QString( "?gdanchor=" ) + uniquePrefix + wordCrossLink.cap( 4 ).mid( 1 );
newLink.replace( pos, wordCrossLink.cap( 0 ).size(), newTxt ); newTxt += wordCrossLink.cap( 2 );
newLink.replace( pos, wordCrossLink.cap( 0 ).length(), newTxt );
} }
} }
else else
if( linkType.toLower().compare( "link" ) == 0 ) if( linkType.compare( "link" ) == 0 )
{ {
// stylesheets // stylesheets
newLink = linkTxt int pos = stylesRe.indexIn( linkTxt );
.replace( stylesRe, if( pos >= 0 )
"\\1bres://" + id + "/\\2" ) {
.replace( stylesRe2, QString newText = stylesRe.cap( 1 ) + stylesRe.cap( 2 )
+ "bres://" + id + "/"
+ stylesRe.cap( 3 ) + stylesRe.cap( 2 );
newLink = linkTxt.replace( pos, stylesRe.cap().length(), newText );
}
else
newLink = linkTxt.replace( stylesRe2,
"\\1\"bres://" + id + "/\\2\"" ); "\\1\"bres://" + id + "/\\2\"" );
} }
else else
if( linkType.toLower().compare( "script" ) == 0 ) if( linkType.compare( "script" ) == 0 || linkType.compare( "img" ) == 0 )
{ {
// javascripts // javascripts and images
if( inlineScriptRe.exactMatch( linkTxt ) ) if( linkType.at( 0 ) == 's' && inlineScriptRe.exactMatch( linkTxt ) )
{ {
// skip inline scripts // skip inline scripts
int pos = article.indexOf( closeScriptTagRe, linkPos ); linkPos += linkTxt.length();
linkPos += pos > 0 ? pos : linkTxt.length(); int pos = closeScriptTagRe.indexIn( article, linkPos );
if( pos > 0 )
linkPos = pos + closeScriptTagRe.cap().length();
continue; continue;
} }
else else
{ {
newLink = linkTxt int pos = srcRe.indexIn( linkTxt );
.replace( scriptRe, if( pos >= 0 )
"\\1bres://" + id + "/\\2" ) {
.replace( scriptRe2, QString newText = srcRe.cap( 1 ) + srcRe.cap( 2 )
"\\1\"bres://" + id + "/\\2\"" ); + "bres://" + id + "/"
} + srcRe.cap( 3 ) + srcRe.cap( 2 );
newLink = linkTxt.replace( pos, srcRe.cap().length(), newText );
} }
else else
if( linkType.toLower().compare( "img" ) == 0 ) newLink = linkTxt.replace( srcRe2,
{
// images
newLink = linkTxt
.replace( imgRe,
"\\1bres://" + id + "/\\2" )
.replace( imgRe2,
"\\1\"bres://" + id + "/\\2\"" ); "\\1\"bres://" + id + "/\\2\"" );
} }
}
if( !newLink.isEmpty() ) if( !newLink.isEmpty() )
{ {
article.replace( linkPos, allLinksRe.cap().length(), newLink ); article.replace( linkPos, allLinksRe.cap().length(), newLink );