mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 23:34:06 +00:00
Merge pull request #206 from xiaoyifang/fix/zim
fix: zim dictionary auto refresh logic
This commit is contained in:
commit
276056c9e1
|
@ -167,10 +167,17 @@ QNetworkReply * ArticleNetworkAccessManager::getArticleReply( QNetworkRequest co
|
|||
|
||||
if(req.url().scheme()=="gdlookup"){
|
||||
QString path=url.path();
|
||||
if(!path.isEmpty()){
|
||||
Utils::Url::addQueryItem(url,"word",path.mid(1));
|
||||
url.setPath("");
|
||||
Utils::Url::addQueryItem(url,"group","1");
|
||||
if( !path.isEmpty() )
|
||||
{
|
||||
url.setPath( "" );
|
||||
QByteArray referer = req.rawHeader( "Referer" );
|
||||
QUrl refererUrl = QUrl::fromEncoded( referer );
|
||||
|
||||
Utils::Url::addQueryItem( url, "word", path.mid( 1 ) );
|
||||
if( Utils::Url::hasQueryItem( refererUrl, "group" ) )
|
||||
{
|
||||
Utils::Url::addQueryItem( url, "group", Utils::Url::queryItemValue( refererUrl, "group" ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -311,7 +318,7 @@ sptr< Dictionary::DataRequest > ArticleNetworkAccessManager::getResource(
|
|||
|
||||
bool ignoreDiacritics = Utils::Url::queryItemValue( url, "ignore_diacritics" ) == "1";
|
||||
|
||||
if ( groupIsValid && phrase.isValid() ) // Require group and phrase to be passed
|
||||
if ( phrase.isValid() ) // Require group and phrase to be passed
|
||||
return articleMaker.makeDefinitionFor( phrase, group, contexts, mutedDicts, QStringList(), ignoreDiacritics );
|
||||
}
|
||||
|
||||
|
|
|
@ -65,3 +65,4 @@ QRegularExpression Mdx::styleElment( "(<style[^>]*>)([\\w\\W]*?)(<\\/style>)",
|
|||
QRegularExpression::CaseInsensitiveOption);
|
||||
|
||||
|
||||
QRegularExpression Zim::linkSpecialChar("[\\.\\/]");
|
||||
|
|
|
@ -45,6 +45,11 @@ public:
|
|||
static QRegularExpression styleElment;
|
||||
};
|
||||
|
||||
class Zim{
|
||||
public:
|
||||
static QRegularExpression linkSpecialChar;
|
||||
};
|
||||
|
||||
} // namespace RX
|
||||
|
||||
#endif // GLOBALREGEX_HH
|
||||
|
|
111
zim.cc
111
zim.cc
|
@ -42,6 +42,7 @@
|
|||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <QtConcurrent>
|
||||
#include "base/globalregex.hh"
|
||||
|
||||
namespace Zim {
|
||||
|
||||
|
@ -838,7 +839,7 @@ string ZimDictionary::convert( const string & in )
|
|||
QString( "<body \\1" ) );
|
||||
|
||||
// pattern of img and script
|
||||
text.replace( QRegularExpression( "<\\s*(img|script)\\s+([^>]*)src=(\"|)(\\.\\.|)/" ),
|
||||
text.replace( QRegularExpression( "<\\s*(img|script)\\s+([^>]*)src=(\"|)(\\.\\./)*" ),
|
||||
QString( "<\\1 \\2src=\\3bres://%1/").arg( getId().c_str() ) );
|
||||
|
||||
// Fix links without '"'
|
||||
|
@ -874,54 +875,35 @@ string ZimDictionary::convert( const string & in )
|
|||
for( int i = list.size(); i < 5; i++ )
|
||||
list.append( QString() );
|
||||
|
||||
QString formatTag;
|
||||
QString tag = list[3]; // a url, ex: Precambrian_Chaotian.html
|
||||
if ( !list[4].isEmpty() ) // a title, ex: title="Precambrian/Chaotian"
|
||||
tag = list[4].split("\"")[1];
|
||||
|
||||
// Check type of links inside articles
|
||||
if( linksType == UNKNOWN && tag.indexOf( '/' ) >= 0 )
|
||||
{
|
||||
QString word = QUrl::fromPercentEncoding( tag.toLatin1() );
|
||||
QRegularExpression htmlRx( "\\.(s|)htm(l|)$", QRegularExpression::CaseInsensitiveOption );
|
||||
word.remove( htmlRx ).
|
||||
replace( "_", " " );
|
||||
|
||||
vector< WordArticleLink > links;
|
||||
links = findArticles( gd::toWString( word ) );
|
||||
|
||||
if( !links.empty() )
|
||||
{
|
||||
linksType = SLASH;
|
||||
}
|
||||
else
|
||||
{
|
||||
word.remove( QRegularExpression(".*/") );
|
||||
links = findArticles( gd::toWString( word ) );
|
||||
if( !links.empty() )
|
||||
{
|
||||
linksType = NO_SLASH;
|
||||
links.clear();
|
||||
}
|
||||
}
|
||||
tag = list[4];
|
||||
formatTag=tag.split("\"")[1];
|
||||
}
|
||||
else{
|
||||
//tag from list[3]
|
||||
formatTag = tag;
|
||||
}
|
||||
|
||||
if( linksType == SLASH || linksType == UNKNOWN )
|
||||
{
|
||||
tag.remove( QRegularExpression( "\\.(s|)htm(l|)$", QRegularExpression::PatternOption::CaseInsensitiveOption ) ).
|
||||
replace( "_", "%20" ).
|
||||
prepend( "<a href=\"gdlookup://localhost/" ).
|
||||
append( "\" " + list[4] + ">" );
|
||||
formatTag.replace(RX::Zim::linkSpecialChar,"");
|
||||
|
||||
vector< WordArticleLink > links;
|
||||
links = findArticles( gd::toWString( formatTag ) );
|
||||
|
||||
|
||||
QString urlLink = match.captured();
|
||||
QString replacedLink ;
|
||||
|
||||
if(!links.empty()){
|
||||
replacedLink = urlLink.replace(tag,"gdlookup://localhost/"+formatTag);
|
||||
}
|
||||
else
|
||||
{
|
||||
tag.remove( QRegularExpression(".*/") ).
|
||||
remove( QRegularExpression( "\\.(s|)htm(l|)$", QRegularExpression::PatternOption::CaseInsensitiveOption ) ).
|
||||
replace( "_", "%20" ).
|
||||
prepend( "<a href=\"gdlookup://localhost/" ).
|
||||
append( "\" " + list[4] + ">" );
|
||||
else{
|
||||
replacedLink = urlLink.replace(tag,"bres://localhost/"+formatTag);
|
||||
}
|
||||
|
||||
newText += tag;
|
||||
newText += replacedLink;
|
||||
}
|
||||
if( pos )
|
||||
{
|
||||
|
@ -1507,9 +1489,9 @@ void ZimResourceRequest::run()
|
|||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > ZimDictionary::getResource( string const & name )
|
||||
|
||||
{
|
||||
return new ZimResourceRequest( *this, name );
|
||||
auto formatedName = QString::fromStdString(name).replace(RX::Zim::linkSpecialChar,"");
|
||||
return new ZimResourceRequest( *this, formatedName.toStdString() );
|
||||
}
|
||||
|
||||
//} // anonymous namespace
|
||||
|
@ -1648,25 +1630,41 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
|| ( mimetype == 0xFFFF && df.isArticleMime( redirected_mime ) ) ) ) )
|
||||
{
|
||||
wstring word;
|
||||
if( !title.empty() )
|
||||
word = Utf8::decode( title );
|
||||
else
|
||||
word = Utf8::decode( url );
|
||||
|
||||
if( df.isArticleMime( mimetype )
|
||||
|| ( mimetype == 0xFFFF && df.isArticleMime( redirected_mime ) ) )
|
||||
{
|
||||
if( maxHeadwordsToExpand && zh.articleCount >= maxHeadwordsToExpand )
|
||||
indexedWords.addSingleWord( word, n );
|
||||
{
|
||||
if( !title.empty() )
|
||||
{
|
||||
word = Utf8::decode( title );
|
||||
indexedWords.addSingleWord( word, n );
|
||||
}
|
||||
if( !url.empty() )
|
||||
{
|
||||
auto formatedUrl = QString::fromStdString( url ).replace( RX::Zim::linkSpecialChar, "" );
|
||||
indexedWords.addSingleWord( Utf8::decode( formatedUrl.toStdString() ), n );
|
||||
}
|
||||
}
|
||||
else
|
||||
indexedWords.addWord( word, n );
|
||||
{
|
||||
if( !title.empty() )
|
||||
{
|
||||
word = Utf8::decode( title );
|
||||
indexedWords.addWord( word, n );
|
||||
}
|
||||
if( !url.empty() )
|
||||
{
|
||||
auto formatedUrl = QString::fromStdString( url ).replace( RX::Zim::linkSpecialChar, "" );
|
||||
indexedWords.addWord( Utf8::decode( formatedUrl.toStdString() ), n );
|
||||
}
|
||||
}
|
||||
wordCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
url.insert( url.begin(), '/' );
|
||||
url.insert( url.begin(), nameSpace );
|
||||
indexedResources.addSingleWord( Utf8::decode( url ), n );
|
||||
auto formatedUrl = QString::fromStdString(url).replace(RX::Zim::linkSpecialChar,"");
|
||||
indexedResources.addSingleWord( Utf8::decode( formatedUrl.toStdString() ), n );
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1702,9 +1700,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
}
|
||||
else
|
||||
{
|
||||
url.insert( url.begin(), '/' );
|
||||
url.insert( url.begin(), nameSpace );
|
||||
indexedResources.addSingleWord( Utf8::decode( url ), n );
|
||||
// url.insert( url.begin(), '/' );
|
||||
// url.insert( url.begin(), nameSpace );
|
||||
auto formatedUrl = QString::fromStdString(url).replace(RX::Zim::linkSpecialChar,"");
|
||||
indexedResources.addSingleWord( Utf8::decode( formatedUrl.toStdString() ), n );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue