goldendict-ng/article_maker.cc
shenleban tongying 540dda26ed cleanup: Replace all usages of qrcx://localhost with qrc://
The replacement command:
git grep -l 'qrcx://localhost' | xargs sed -i 's/qrcx:\/\/localhost/qrc:\/\//g'

The qrcx:// URL scheme was introduced in 2009 or earlier - it is present
in the first commit in GoldenDict's git history. Back then GoldenDict
supported Qt versions earlier than 4.6, in which
QWebSecurityOrigin::addLocalScheme() was introduced. Adding the qrc URL
scheme as local obsoletes the qrcx URL scheme. GoldenDict does not
compile against Qt versions earlier than 4.6, so there is no reason to
use this custom URL scheme anymore.

Co-authored-by:  Igor Kushnir <igorkuo@gmail.com>
2023-03-05 15:20:05 -05:00

1166 lines
34 KiB
C++

/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "article_maker.hh"
#include "config.hh"
#include "htmlescape.hh"
#include "utf8.hh"
#include "wstring_qt.hh"
#include <limits.h>
#include <QFile>
#include <QUrl>
#include <QTextDocumentFragment>
#include "folding.hh"
#include "langcoder.hh"
#include "gddebug.hh"
#include "utils.hh"
#include "globalbroadcaster.h"
using std::vector;
using std::string;
using gd::wstring;
using std::set;
using std::list;
ArticleMaker::ArticleMaker( vector< sptr< Dictionary::Class > > const & dictionaries_,
vector< Instances::Group > const & groups_,
QString const & displayStyle_,
QString const & addonStyle_):
dictionaries( dictionaries_ ),
groups( groups_ ),
displayStyle( displayStyle_ ),
addonStyle( addonStyle_ ),
needExpandOptionalParts( true )
, collapseBigArticles( true )
, articleLimitSize( 500 )
{
}
void ArticleMaker::setDisplayStyle( QString const & st, QString const & adst )
{
displayStyle = st;
addonStyle = adst;
}
std::string ArticleMaker::makeHtmlHeader( QString const & word,
QString const & icon,
bool expandOptionalParts ) const
{
string result =
"<!DOCTYPE html>"
"<html><head>"
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">";
// add jquery
{
result += "<script type=\"text/javascript\" "
"src=\"qrc:///scripts/jquery-3.6.0.slim.min.js\"></script>";
result += "<script> var $_$=$.noConflict(); </script>";
//custom javascript
result += R"(<script type="text/javascript" src="qrc:///scripts/gd-custom.js"></script>)";
//iframe resizer javascript
result += R"(<script type="text/javascript" src="qrc:///scripts/iframeResizer.min.js"></script>)";
}
// add qwebchannel
{
result += R"(<script type="text/javascript" src="qrc:///qtwebchannel/qwebchannel.js"></script>)";
}
// document ready ,init webchannel
{
result += "<script>"
" $_$(document).ready( function ($){ "
" console.log(\"webchannel ready...\"); "
" new QWebChannel(qt.webChannelTransport, function(channel) { "
" window.articleview = channel.objects.articleview; "
" }); "
" }); "
"</script>";
}
// Add a css stylesheet
{
result += R"(<link href="qrc:///article-style.css" media="all" rel="stylesheet" type="text/css">)";
if ( displayStyle.size() )
{
// Load an additional stylesheet
QString displayStyleCssFile = QString("qrc:///article-style-st-%1.css").arg(displayStyle);
result += "<link href=\"" + displayStyleCssFile.toStdString() +
R"(" media="all" rel="stylesheet" type="text/css">)";
}
result += readCssFile(Config::getUserCssFileName() ,"all");
if( !addonStyle.isEmpty() )
{
QString name = Config::getStylesDir() + addonStyle
+ QDir::separator() + "article-style.css";
result += readCssFile(name ,"all");
}
// Turn on/off expanding of article optional parts
if( expandOptionalParts )
{
result += "<!-- Expand optional parts css -->\n";
result += "<style type=\"text/css\" media=\"all\">\n";
result += "\n.dsl_opt\n{\n display: inline;\n}\n\n.hidden_expand_opt\n{\n display: none;\n}\n";
result += "</style>\n";
}
}
// Add print-only css
{
result += R"(<link href="qrc:///article-style-print.css" media="print" rel="stylesheet" type="text/css">)";
result += readCssFile(Config::getUserCssPrintFileName() ,"print");
if( !addonStyle.isEmpty() )
{
QString name = Config::getStylesDir() + addonStyle
+ QDir::separator() + "article-style-print.css";
result += readCssFile(name ,"print");
}
}
result += "<title>" + Html::escape( word.toStdString()) + "</title>";
// This doesn't seem to be much of influence right now, but we'll keep
// it anyway.
if ( icon.size() )
result += R"(<link rel="icon" type="image/png" href="qrc:///flags/)" + Html::escape( icon.toUtf8().data() ) + "\" />\n";
result += "<script type=\"text/javascript\">"
"function tr(key) {"
" var tr_map = {"
"\"Expand article\":\"";
result += tr("Expand article").toUtf8().data();
result += R"(","Collapse article":")";
result += tr("Collapse article").toUtf8().data();
result += "\" };"
"return tr_map[key] || '';"
"}"
"</script>";
result+= R"(<script type="text/javascript" src="qrc:///scripts/gd-builtin.js"></script>)";
if( GlobalBroadcaster::instance()->getPreference()->darkReaderMode )
{
// #242525 because Darkreader will invert pure white to this value
result += R"(
<script src="qrc:///scripts/darkreader.js"></script>
<style>
body { background: #242525; }
.gdarticle { background: initial;}
</style>
<script>
// This function returns a promise, but it is synchroneous because it does not use await
function fetchShim(src) {
if (src.startsWith('gdlookup://')) {
// See https://github.com/xiaoyifang/goldendict/issues/363
console.error('Dark Reader discovered unexpected URL', src);
return Promise.resolve({blob: () => new Blob()});
}
if (src.startsWith('qrcx://') || src.startsWith('qrc://')) {
// This is a resource URL, need to fetch and transform
return new Promise((resolve) => {
const img = document.createElement('img');
img.addEventListener('load', () => {
// Set willReadFrequently to true to tell engine to store data in RAM-backed buffer and not on GPU
const canvas = document.createElement('canvas', {willReadFrequently: true});
canvas.width = img.naturalWidth;
canvas.height = img.naturalHeight;
const ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
canvas.toBlob((blob) => {
resolve({blob: () => blob});
});
}, false);
img.src = src;
});
}
// This is a standard URL, can fetch it directly
return fetch(src);
}
DarkReader.setFetchMethod(fetchShim);
DarkReader.enable({
brightness: 100,
contrast: 90,
sepia: 10
});
</script>
)";
}
result += "</head><body>";
return result;
}
std::string ArticleMaker::readCssFile(QString const & fileName, std::string media) const{
QFile addonCss(fileName);
std::string result;
if (addonCss.open(QFile::ReadOnly)) {
QByteArray css = addonCss.readAll();
if (!css.isEmpty()) {
result += "<!-- Addon style css -->\n";
result += R"(<style type="text/css" media=")" + media + "\">\n";
result += css.data();
result += "</style>\n";
}
}
return result;
}
std::string ArticleMaker::makeNotFoundBody( QString const & word,
QString const & group )
{
string result( "<div class=\"gdnotfound\"><p>" );
QString str( word );
if( str.isRightToLeft() )
{
str.insert( 0, (ushort)0x202E ); // RLE, Right-to-Left Embedding
str.append( (ushort)0x202C ); // PDF, POP DIRECTIONAL FORMATTING
}
if ( word.size() )
result += tr( "No translation for <b>%1</b> was found in group <b>%2</b>." ).
arg( QString::fromUtf8( Html::escape( str.toUtf8().data() ).c_str() ) ).
arg( QString::fromUtf8( Html::escape( group.toUtf8().data() ).c_str() ) ).
toUtf8().data();
else
result += tr( "No translation was found in group <b>%1</b>." ).
arg( QString::fromUtf8( Html::escape( group.toUtf8().data() ).c_str() ) ).
toUtf8().data();
result += "</p></div>";
return result;
}
sptr< Dictionary::DataRequest > ArticleMaker::makeDefinitionFor(
Config::InputPhrase const & phrase, unsigned groupId,
QMap< QString, QString > const & contexts,
QSet< QString > const & mutedDicts,
QStringList const & dictIDs , bool ignoreDiacritics ) const
{
if( !dictIDs.isEmpty() )
{
QStringList ids = dictIDs;
std::vector< sptr< Dictionary::Class > > ftsDicts;
// Find dictionaries by ID's
for( unsigned x = 0; x < dictionaries.size(); x++ )
{
for( QStringList::Iterator it = ids.begin(); it != ids.end(); ++it )
{
if( *it == QString::fromStdString( dictionaries[ x ]->getId() ) )
{
ftsDicts.push_back( dictionaries[ x ] );
ids.erase( it );
break;
}
}
if( ids.isEmpty() )
break;
}
string header = makeHtmlHeader( phrase.phrase, QString(), true );
return std::make_shared<ArticleRequest>( phrase, "",
contexts, ftsDicts, header,
-1, true );
}
if ( groupId == Instances::Group::HelpGroupId )
{
// This is a special group containing internal welcome/help pages
string result = makeHtmlHeader( phrase.phrase, QString(), needExpandOptionalParts );
if ( phrase.phrase == tr( "Welcome!" ) )
{
result += tr(
"<h3 align=\"center\">Welcome to <b>GoldenDict</b>!</h3>"
"<p>To start working with the program, first visit <b>Edit|Dictionaries</b> to add some directory paths where to search "
"for the dictionary files, set up various Wikipedia sites or other sources, adjust dictionary order or create dictionary groups."
"<p>And then you're ready to look up your words! You can do that in this window "
"by using a pane to the left, or you can <a href=\"Working with popup\">look up words from other active applications</a>. "
"<p>To customize program, check out the available preferences at <b>Edit|Preferences</b>. "
"All settings there have tooltips, be sure to read them if you are in doubt about anything."
"<p>Should you need further help, have any questions, "
"suggestions or just wonder what the others think, you are welcome at the program's <a href=\"https://github.com/xiaoyifang/goldendict/discussions\">forum</a>."
"<p>Check program's <a href=\"https://github.com/xiaoyifang/goldendict\">website</a> for the updates. "
"<p>(c) 2008-2013 Konstantin Isakov. Licensed under GPLv3 or later."
).toUtf8().data();
}
else
if ( phrase.phrase == tr( "Working with popup" ) )
{
result += ( tr( "<h3 align=\"center\">Working with the popup</h3>"
"To look up words from other active applications, you would need to first activate the <i>\"Scan popup functionality\"</i> in <b>Preferences</b>, "
"and then enable it at any time either by triggering the 'Popup' icon above, or "
"by clicking the tray icon down below with your right mouse button and choosing so in the menu you've popped. " ) +
#ifdef Q_OS_WIN32
tr( "Then just stop the cursor over the word you want to look up in another application, "
"and a window would pop up which would describe it to you." )
#else
tr( "Then just select any word you want to look up in another application by your mouse "
"(double-click it or swipe it with mouse with the button pressed), "
"and a window would pop up which would describe the word to you." )
#endif
).toUtf8().data();
}
else
{
// Not found
return makeNotFoundTextFor( phrase.phrase, "help" );
}
result += "</body></html>";
sptr< Dictionary::DataRequestInstant > r = std::make_shared<Dictionary::DataRequestInstant>( true );
r->getData().resize( result.size() );
memcpy( &( r->getData().front() ), result.data(), result.size() );
return r;
}
// Find the given group
Instances::Group const * activeGroup = 0;
for( unsigned x = 0; x < groups.size(); ++x )
if ( groups[ x ].id == groupId )
{
activeGroup = &groups[ x ];
break;
}
// If we've found a group, use its dictionaries; otherwise, use the global
// heap.
std::vector< sptr< Dictionary::Class > > const & activeDicts =
activeGroup ? activeGroup->dictionaries : dictionaries;
string header = makeHtmlHeader( phrase.phrase,
activeGroup && activeGroup->icon.size() ?
activeGroup->icon : QString(),
needExpandOptionalParts );
if ( mutedDicts.size() )
{
std::vector< sptr< Dictionary::Class > > unmutedDicts;
unmutedDicts.reserve( activeDicts.size() );
for( unsigned x = 0; x < activeDicts.size(); ++x )
if ( !mutedDicts.contains(
QString::fromStdString( activeDicts[ x ]->getId() ) ) )
unmutedDicts.push_back( activeDicts[ x ] );
return std::make_shared<ArticleRequest>( phrase, activeGroup ? activeGroup->name : "",
contexts, unmutedDicts, header,
collapseBigArticles ? articleLimitSize : -1,
needExpandOptionalParts, ignoreDiacritics );
}
else
return std::make_shared<ArticleRequest>( phrase, activeGroup ? activeGroup->name : "",
contexts, activeDicts, header,
collapseBigArticles ? articleLimitSize : -1,
needExpandOptionalParts, ignoreDiacritics );
}
sptr< Dictionary::DataRequest > ArticleMaker::makeNotFoundTextFor(
QString const & word, QString const & group ) const
{
string result = makeHtmlHeader( word, QString(), true ) + makeNotFoundBody( word, group ) +
"</body></html>";
sptr< Dictionary::DataRequestInstant > r = std::make_shared<Dictionary::DataRequestInstant>( true );
r->getData().resize( result.size() );
memcpy( &( r->getData().front() ), result.data(), result.size() );
return r;
}
sptr< Dictionary::DataRequest > ArticleMaker::makeEmptyPage() const
{
string result = makeHtmlHeader( tr( "(untitled)" ), QString(), true ) +
"</body></html>";
sptr< Dictionary::DataRequestInstant > r =
std::make_shared<Dictionary::DataRequestInstant>( true );
r->getData().resize( result.size() );
memcpy( &( r->getData().front() ), result.data(), result.size() );
return r;
}
sptr< Dictionary::DataRequest > ArticleMaker::makePicturePage( string const & url ) const
{
string result = makeHtmlHeader( tr( "(picture)" ), QString(), true )
+ "<a href=\"javascript: if(history.length>2) history.go(-1)\">"
+ "<img src=\"" + url + "\" /></a>"
+ "</body></html>";
sptr< Dictionary::DataRequestInstant > r =
std::make_shared<Dictionary::DataRequestInstant>( true );
r->getData().resize( result.size() );
memcpy( &( r->getData().front() ), result.data(), result.size() );
return r;
}
void ArticleMaker::setExpandOptionalParts( bool expand )
{
needExpandOptionalParts = expand;
}
void ArticleMaker::setCollapseParameters( bool autoCollapse, int articleSize )
{
collapseBigArticles = autoCollapse;
articleLimitSize = articleSize;
}
bool ArticleMaker::adjustFilePath( QString & fileName )
{
QFileInfo info( fileName );
if( !info.isFile() )
{
QString dir = Config::getConfigDir();
dir.chop( 1 );
info.setFile( dir + fileName);
if( info.isFile() )
{
fileName = info.canonicalFilePath();
return true;
}
}
return false;
}
//////// ArticleRequest
ArticleRequest::ArticleRequest(
Config::InputPhrase const & phrase, QString const & group_,
QMap< QString, QString > const & contexts_,
vector< sptr< Dictionary::Class > > const & activeDicts_,
string const & header,
int sizeLimit, bool needExpandOptionalParts_, bool ignoreDiacritics_ ):
word( phrase.phrase ), group( group_ ), contexts( contexts_ ),
activeDicts( activeDicts_ ),
altsDone( false ), bodyDone( false ), foundAnyDefinitions( false ),
closePrevSpan( false )
, articleSizeLimit( sizeLimit )
, needExpandOptionalParts( needExpandOptionalParts_ )
, ignoreDiacritics( ignoreDiacritics_ )
{
if ( !phrase.punctuationSuffix.isEmpty() )
alts.insert( gd::toWString( phrase.phraseWithSuffix() ) );
// No need to lock dataMutex on construction
hasAnyData = true;
data.resize( header.size() );
memcpy( &data.front(), header.data(), header.size() );
//clear founded dicts.
emit GlobalBroadcaster::instance()->dictionaryClear( ActiveDictIds{word} );
// Accumulate main forms
for( unsigned x = 0; x < activeDicts.size(); ++x )
{
sptr< Dictionary::WordSearchRequest > s = activeDicts[ x ]->findHeadwordsForSynonym( gd::toWString( word ) );
connect( s.get(), &Dictionary::Request::finished, this, &ArticleRequest::altSearchFinished, Qt::QueuedConnection );
altSearches.push_back( s );
}
altSearchFinished(); // Handle any ones which have already finished
}
void ArticleRequest::altSearchFinished()
{
if ( altsDone )
return;
// Check every request for finishing
for( list< sptr< Dictionary::WordSearchRequest > >::iterator i =
altSearches.begin(); i != altSearches.end(); )
{
if ( (*i)->isFinished() )
{
// This one's finished
for( size_t count = (*i)->matchesCount(), x = 0; x < count; ++x )
alts.insert( (**i)[ x ].word );
altSearches.erase( i++ );
}
else
++i;
}
if ( altSearches.empty() )
{
#ifdef QT_DEBUG
qDebug( "alts finished" );
#endif
// They all've finished! Now we can look up bodies
altsDone = true; // So any pending signals in queued mode won't mess us up
vector< wstring > altsVector( alts.begin(), alts.end() );
#ifdef QT_DEBUG
for( unsigned x = 0; x < altsVector.size(); ++x )
{
qDebug() << "Alt:" << gd::toQString( altsVector[ x ] );
}
#endif
wstring wordStd = gd::toWString( word );
if( activeDicts.size() <= 1 )
articleSizeLimit = -1; // Don't collapse article if only one dictionary presented
for( unsigned x = 0; x < activeDicts.size(); ++x )
{
try
{
sptr< Dictionary::DataRequest > r =
activeDicts[ x ]->getArticle( wordStd, altsVector,
gd::toWString( contexts.value( QString::fromStdString( activeDicts[ x ]->getId() ) ) ),
ignoreDiacritics );
connect( r.get(), &Dictionary::Request::finished, this, &ArticleRequest::bodyFinished, Qt::QueuedConnection );
bodyRequests.push_back( r );
}
catch( std::exception & e )
{
gdWarning( "getArticle request error (%s) in \"%s\"\n",
e.what(), activeDicts[ x ]->getName().c_str() );
}
}
bodyFinished(); // Handle any ones which have already finished
}
}
int ArticleRequest::findEndOfCloseDiv( const QString &str, int pos )
{
for( ; ; )
{
int n1 = str.indexOf( "</div>", pos );
if( n1 <= 0 )
return n1;
int n2 = str.indexOf( "<div ", pos );
if( n2 <= 0 || n2 > n1 )
return n1 + 6;
pos = findEndOfCloseDiv( str, n2 + 1 );
if( pos <= 0 )
return pos;
}
}
void ArticleRequest::bodyFinished()
{
if ( bodyDone )
return;
GD_DPRINTF( "some body finished" );
bool wasUpdated = false;
QStringList dictIds;
while ( bodyRequests.size() )
{
// Since requests should go in order, check the first one first
if ( bodyRequests.front()->isFinished() )
{
// Good
GD_DPRINTF( "one finished." );
Dictionary::DataRequest & req = *bodyRequests.front();
QString errorString = req.getErrorString();
if ( req.dataSize() >= 0 || errorString.size() )
{
sptr< Dictionary::Class > const & activeDict =
activeDicts[ activeDicts.size() - bodyRequests.size() ];
string dictId = activeDict->getId();
dictIds << QString::fromStdString(dictId);
string head;
string gdFrom = "gdfrom-" + Html::escape( dictId );
if ( closePrevSpan )
{
head += R"(</div></div><div style="clear:both;"></div><span class="gdarticleseparator"></span>)";
}
bool collapse = false;
if( articleSizeLimit >= 0 )
{
try
{
Mutex::Lock _( dataMutex );
QString text = QString::fromUtf8( req.getFullData().data(), req.getFullData().size() );
if( !needExpandOptionalParts )
{
// Strip DSL optional parts
int pos = 0;
for( ; ; )
{
pos = text.indexOf( "<div class=\"dsl_opt\"" );
if( pos > 0 )
{
int endPos = findEndOfCloseDiv( text, pos + 1 );
if( endPos > pos)
text.remove( pos, endPos - pos );
else
break;
}
else
break;
}
}
int size = htmlTextSize( text );
if( size > articleSizeLimit )
collapse = true;
}
catch(...)
{
}
}
string jsVal = Html::escapeForJavaScript( dictId );
head += string( "<div class=\"gdarticle" ) +
( closePrevSpan ? "" : " gdactivearticle" ) +
( collapse ? " gdcollapsedarticle" : "" ) +
"\" id=\"" + gdFrom +
"\" onClick=\"gdMakeArticleActive( '" + jsVal + "' );\" " +
" onContextMenu=\"gdMakeArticleActive( '" + jsVal + "' );\""
+ ">";
closePrevSpan = true;
head += string( R"(<div class="gddictname" onclick="gdExpandArticle(')" ) + dictId + "\');"
+ ( collapse ? "\" style=\"cursor:pointer;" : "" )
+ "\" id=\"gddictname-" + Html::escape( dictId ) + "\""
+ ( collapse ? string( " title=\"" ) + tr( "Expand article" ).toUtf8().data() + "\"" : "" )
+ R"(><span class="gddicticon"><img src="gico://)" + Html::escape( dictId )
+ R"(/dicticon.png"></span><span class="gdfromprefix">)" +
Html::escape( tr( "From " ).toUtf8().data() ) + "</span><span class=\"gddicttitle\">" +
Html::escape( activeDict->getName().c_str() ) + "</span>"
+ R"(<span class="collapse_expand_area"><img src="qrc:///icons/blank.png" class=")"
+ ( collapse ? "gdexpandicon" : "gdcollapseicon" )
+ "\" id=\"expandicon-" + Html::escape( dictId ) + "\""
+ ( collapse ? "" : string( " title=\"" ) + tr( "Collapse article" ).toUtf8().data() + "\"" )
+ "></span>" + "</div>";
head += "<div class=\"gddictnamebodyseparator\"></div>";
head += "<div class=\"gdarticlebody gdlangfrom-";
head += LangCoder::intToCode2( activeDict->getLangFrom() ).toLatin1().data();
head += "\" lang=\"";
head += LangCoder::intToCode2( activeDict->getLangTo() ).toLatin1().data();
head += "\"";
head += " style=\"display:";
head += collapse ? "none" : "inline";
head += string( "\" id=\"gdarticlefrom-" ) + Html::escape( dictId ) + "\">";
if ( errorString.size() )
{
head += "<div class=\"gderrordesc\">" +
Html::escape( tr( "Query error: %1" ).arg( errorString ).toUtf8().data() )
+ "</div>";
}
Mutex::Lock _( dataMutex );
size_t offset = data.size();
data.resize( data.size() + head.size() + ( req.dataSize() > 0 ? req.dataSize() : 0 ) );
memcpy( &data.front() + offset, head.data(), head.size() );
try
{
if ( req.dataSize() > 0 )
bodyRequests.front()->getDataSlice( 0, req.dataSize(),
&data.front() + offset + head.size() );
}
catch( std::exception & e )
{
gdWarning( "getDataSlice error: %s\n", e.what() );
}
wasUpdated = true;
foundAnyDefinitions = true;
}
GD_DPRINTF( "erasing.." );
bodyRequests.pop_front();
GD_DPRINTF( "erase done.." );
}
else
{
GD_DPRINTF( "one not finished." );
break;
}
}
if ( bodyRequests.empty() )
{
// No requests left, end the article
bodyDone = true;
{
string footer;
if ( closePrevSpan )
{
footer += "</div></div>";
closePrevSpan = false;
}
if ( !foundAnyDefinitions )
{
// No definitions were ever found, say so to the user.
// Larger words are usually whole sentences - don't clutter the output
// with their full bodies.
footer += ArticleMaker::makeNotFoundBody( word.size() < 40 ? word : "", group );
// When there were no definitions, we run stemmed search.
stemmedWordFinder = std::make_shared<WordFinder>( this );
connect( stemmedWordFinder.get(),
&WordFinder::finished,
this,
&ArticleRequest::stemmedSearchFinished,
Qt::QueuedConnection );
stemmedWordFinder->stemmedMatch( word, activeDicts );
}
else
{
footer += "</body></html>";
}
Mutex::Lock _( dataMutex );
size_t offset = data.size();
data.resize( data.size() + footer.size() );
memcpy( &data.front() + offset, footer.data(), footer.size() );
}
if ( stemmedWordFinder.get() )
{
update();
qDebug() << "send dicts(stemmed):" << word << ":" << dictIds;
emit GlobalBroadcaster::instance()->dictionaryChanges(ActiveDictIds{word, dictIds});
dictIds.clear();
}
else {
finish();
qDebug() << "send dicts(finished):" << word << ":" << dictIds;
emit GlobalBroadcaster::instance()->dictionaryChanges(ActiveDictIds{word, dictIds});
dictIds.clear();
}
} else if (wasUpdated) {
update();
qDebug() << "send dicts(updated):" << word << ":" << dictIds;
emit GlobalBroadcaster::instance()->dictionaryChanges(ActiveDictIds{word, dictIds});
dictIds.clear();
}
}
int ArticleRequest::htmlTextSize( QString html )
{
// website dictionary.
if( html.contains( QRegularExpression( "<iframe\\s*[^>]*>", QRegularExpression::CaseInsensitiveOption ) ) )
{
//arbitary number;
return 1000;
}
//https://bugreports.qt.io/browse/QTBUG-102757
QString stripStyleSheet =
html.remove( QRegularExpression( "<link\\s*[^>]*>", QRegularExpression::CaseInsensitiveOption ) )
.remove( QRegularExpression( R"(<script[\s\S]*?>[\s\S]*?<\/script>)", QRegularExpression::CaseInsensitiveOption|QRegularExpression::MultilineOption ) );
int size = QTextDocumentFragment::fromHtml( stripStyleSheet ).toPlainText().length();
return size;
}
void ArticleRequest::stemmedSearchFinished()
{
// Got stemmed matching results
WordFinder::SearchResults sr = stemmedWordFinder->getResults();
string footer;
bool continueMatching = false;
if ( sr.size() )
{
footer += R"(<div class="gdstemmedsuggestion"><span class="gdstemmedsuggestion_head">)" +
Html::escape( tr( "Close words: " ).toUtf8().data() ) +
"</span><span class=\"gdstemmedsuggestion_body\">";
for( unsigned x = 0; x < sr.size(); ++x )
{
footer += linkWord( sr[ x ].first );
if ( x != sr.size() - 1 )
{
footer += ", ";
}
}
footer += "</span></div>";
}
splittedWords = splitIntoWords( word );
if ( splittedWords.first.size() > 1 ) // Contains more than one word
{
disconnect( stemmedWordFinder.get(), &WordFinder::finished, this, &ArticleRequest::stemmedSearchFinished );
connect( stemmedWordFinder.get(),
&WordFinder::finished,
this,
&ArticleRequest::individualWordFinished,
Qt::QueuedConnection );
currentSplittedWordStart = -1;
currentSplittedWordEnd = currentSplittedWordStart;
firstCompoundWasFound = false;
compoundSearchNextStep( false );
continueMatching = true;
}
if ( !continueMatching )
footer += "</body></html>";
{
Mutex::Lock _( dataMutex );
size_t offset = data.size();
data.resize( data.size() + footer.size() );
memcpy( &data.front() + offset, footer.data(), footer.size() );
}
if ( continueMatching )
update();
else
finish();
}
void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
{
if ( !lastSearchSucceeded )
{
// Last search was unsuccessful. First, emit what we had.
string footer;
if ( lastGoodCompoundResult.size() ) // We have something to append
{
// GD_DPRINTF( "Appending\n" );
if ( !firstCompoundWasFound )
{
// Append the beginning
footer += R"(<div class="gdstemmedsuggestion"><span class="gdstemmedsuggestion_head">)" +
Html::escape( tr( "Compound expressions: " ).toUtf8().data() ) +
"</span><span class=\"gdstemmedsuggestion_body\">";
firstCompoundWasFound = true;
}
else
{
// Append the separator
footer += " / ";
}
footer += linkWord( lastGoodCompoundResult );
lastGoodCompoundResult.clear();
}
// Then, start a new search for the next word, if possible
if ( currentSplittedWordStart >= splittedWords.first.size() - 2 )
{
// The last word was the last possible to start from
if ( firstCompoundWasFound )
footer += "</span>";
// Now add links to all the individual words. They conclude the result.
footer += R"(<div class="gdstemmedsuggestion"><span class="gdstemmedsuggestion_head">)" +
Html::escape( tr( "Individual words: " ).toUtf8().data() ) +
"</span><span class=\"gdstemmedsuggestion_body\"";
if( splittedWords.first[ 0 ].isRightToLeft() )
footer += " dir=\"rtl\"";
footer += ">";
footer += escapeSpacing( splittedWords.second[ 0 ] );
for( int x = 0; x < splittedWords.first.size(); ++x )
{
footer += linkWord( splittedWords.first[ x ] );
footer += escapeSpacing( splittedWords.second[ x + 1 ] );
}
footer += "</span>";
footer += "</body></html>";
appendToData( footer );
finish();
return;
}
if ( footer.size() )
{
appendToData( footer );
update();
}
// Advance to the next word and start from looking up two words
++currentSplittedWordStart;
currentSplittedWordEnd = currentSplittedWordStart + 1;
}
else
{
// Last lookup succeeded -- see if we can try the larger sequence
if ( currentSplittedWordEnd < splittedWords.first.size() - 1 )
{
// We can, indeed.
++currentSplittedWordEnd;
}
else
{
// We can't. Emit what we have and start over.
++currentSplittedWordEnd; // So we could use the same code for result
// emitting
// Initiate new lookup
compoundSearchNextStep( false );
return;
}
}
// Build the compound sequence
currentSplittedWordCompound = makeSplittedWordCompound();
// Look it up
// GD_DPRINTF( "Looking up %s\n", qPrintable( currentSplittedWordCompound ) );
stemmedWordFinder->expressionMatch( currentSplittedWordCompound, activeDicts, 40, // Would one be enough? Leave 40 to be safe.
Dictionary::SuitableForCompoundSearching );
}
QString ArticleRequest::makeSplittedWordCompound()
{
QString result;
result.clear();
for( int x = currentSplittedWordStart; x <= currentSplittedWordEnd; ++x )
{
result.append( splittedWords.first[ x ] );
if ( x < currentSplittedWordEnd )
{
wstring ws( gd::toWString( splittedWords.second[ x + 1 ] ) );
Folding::normalizeWhitespace( ws );
result.append( gd::toQString( ws ) );
}
}
return result;
}
void ArticleRequest::individualWordFinished()
{
WordFinder::SearchResults const & results = stemmedWordFinder->getResults();
if ( results.size() )
{
wstring source = Folding::applySimpleCaseOnly( gd::toWString( currentSplittedWordCompound ) );
bool hadSomething = false;
for( unsigned x = 0; x < results.size(); ++x )
{
if ( results[ x ].second )
{
// Spelling suggestion match found. No need to continue.
hadSomething = true;
lastGoodCompoundResult = currentSplittedWordCompound;
break;
}
// Prefix match found. Check if the aliases are acceptable.
wstring result( Folding::applySimpleCaseOnly( gd::toWString( results[ x ].first ) ) );
if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 )
{
// The resulting string begins with the source one
hadSomething = true;
if ( source.size() == result.size() )
{
// Got the match. No need to continue.
lastGoodCompoundResult = currentSplittedWordCompound;
break;
}
}
}
if ( hadSomething )
{
compoundSearchNextStep( true );
return;
}
}
compoundSearchNextStep( false );
}
void ArticleRequest::appendToData( std::string const & str )
{
Mutex::Lock _( dataMutex );
size_t offset = data.size();
data.resize( data.size() + str.size() );
memcpy( &data.front() + offset, str.data(), str.size() );
}
QPair< ArticleRequest::Words, ArticleRequest::Spacings > ArticleRequest::splitIntoWords( QString const & input )
{
QPair< Words, Spacings > result;
QChar const * ptr = input.data();
for( ; ; )
{
QString spacing;
for( ; ptr->unicode() && ( Folding::isPunct( ptr->unicode() ) || Folding::isWhitespace( ptr->unicode() ) ); ++ptr )
spacing.append( *ptr );
result.second.append( spacing );
QString word;
for( ; ptr->unicode() && !( Folding::isPunct( ptr->unicode() ) || Folding::isWhitespace( ptr->unicode() ) ); ++ptr )
word.append( *ptr );
if ( word.isEmpty() )
break;
result.first.append( word );
}
return result;
}
string ArticleRequest::linkWord( QString const & str )
{
QUrl url;
url.setScheme( "gdlookup" );
url.setHost( "localhost" );
url.setPath( Utils::Url::ensureLeadingSlash( str ) );
string escapedResult = Html::escape( str.toUtf8().data() );
return string( "<a href=\"" ) + url.toEncoded().data() + "\">" + escapedResult +"</a>";
}
std::string ArticleRequest::escapeSpacing( QString const & str )
{
QByteArray spacing = Html::escape( str.toUtf8().data() ).c_str();
spacing.replace( "\n", "<br>" );
return spacing.data();
}
void ArticleRequest::cancel()
{
if( isFinished() )
return;
if( !altSearches.empty() )
{
for( list< sptr< Dictionary::WordSearchRequest > >::iterator i =
altSearches.begin(); i != altSearches.end(); ++i )
{
(*i)->cancel();
}
}
if( !bodyRequests.empty() )
{
for( list< sptr< Dictionary::DataRequest > >::iterator i =
bodyRequests.begin(); i != bodyRequests.end(); ++i )
{
(*i)->cancel();
}
}
if( stemmedWordFinder.get() ) stemmedWordFinder->cancel();
finish();
}