goldendict-ng/src/article_netmgr.cc

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

475 lines
14 KiB
C++
Raw Normal View History

2021-08-14 07:25:10 +00:00
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include <stdint.h>
#include <QUrl>
#include "article_netmgr.hh"
#include "gddebug.hh"
#include "utils.hh"
2021-08-05 06:57:22 +00:00
#include <QNetworkAccessManager>
#include "globalbroadcaster.hh"
using std::string;
// AllowFrameReply
AllowFrameReply::AllowFrameReply( QNetworkReply * _reply ):
baseReply( _reply )
{
// Set base data
setOperation( baseReply->operation() );
setRequest( baseReply->request() );
setUrl( baseReply->url() );
// Signals to own slots
connect( baseReply, &QNetworkReply::metaDataChanged, this, &AllowFrameReply::applyMetaData );
connect( baseReply, &QNetworkReply::errorOccurred, this, &AllowFrameReply::applyError );
connect( baseReply, &QIODevice::readyRead, this, &QIODevice::readyRead );
// Redirect QNetworkReply signals
connect( baseReply, &QNetworkReply::downloadProgress, this, &QNetworkReply::downloadProgress );
connect( baseReply, &QNetworkReply::encrypted, this, &QNetworkReply::encrypted );
connect( baseReply, &QNetworkReply::finished, this, &QNetworkReply::finished );
connect( baseReply,
&QNetworkReply::preSharedKeyAuthenticationRequired,
this,
&QNetworkReply::preSharedKeyAuthenticationRequired );
connect( baseReply, &QNetworkReply::redirected, this, &QNetworkReply::redirected );
connect( baseReply, &QNetworkReply::sslErrors, this, &QNetworkReply::sslErrors );
connect( baseReply, &QNetworkReply::uploadProgress, this, &QNetworkReply::uploadProgress );
// Redirect QIODevice signals
connect( baseReply, &QIODevice::aboutToClose, this, &QIODevice::aboutToClose );
connect( baseReply, &QIODevice::bytesWritten, this, &QIODevice::bytesWritten );
connect( baseReply, &QIODevice::readChannelFinished, this, &QIODevice::readChannelFinished );
setOpenMode( QIODevice::ReadOnly );
}
void AllowFrameReply::applyMetaData()
{
// The webengine does not support to customize the headers right now ,maybe until Qt6.7 there should be some api supports
}
void AllowFrameReply::setReadBufferSize( qint64 size )
{
QNetworkReply::setReadBufferSize( size );
baseReply->setReadBufferSize( size );
}
qint64 AllowFrameReply::bytesAvailable() const
{
return baseReply->bytesAvailable();
}
void AllowFrameReply::applyError( QNetworkReply::NetworkError code )
{
setError( code, baseReply->errorString() );
emit errorOccurred( code );
}
qint64 AllowFrameReply::readData( char * data, qint64 maxSize )
{
auto bytesAvailable = baseReply->bytesAvailable();
qint64 size = qMin( maxSize, bytesAvailable );
baseReply->read( data, size );
return size;
}
2023-03-26 06:46:27 +00:00
void AllowFrameReply::finishedSlot()
{
setFinished( true );
emit finished();
}
QNetworkReply * ArticleNetworkAccessManager::getArticleReply( QNetworkRequest const & req )
{
2023-03-26 06:46:27 +00:00
if ( req.url().scheme() == "qrcx" ) {
// Do not support qrcx which is the custom define protocol.
return new BlockedNetworkReply( this );
}
auto op = GetOperation;
QUrl url = req.url();
QMimeType mineType = db.mimeTypeForUrl( url );
QString contentType = mineType.name();
if ( req.url().scheme() == "gdlookup" ) {
QString path = url.path();
2023-10-28 05:25:10 +00:00
if ( path.size() > 1 ) {
url.setPath( "" );
Utils::Url::addQueryItem( url, "word", path.mid( 1 ) );
Utils::Url::addQueryItem( url, "group", QString::number( GlobalBroadcaster::instance()->currentGroupId ) );
}
}
auto dr = getResource( url, contentType );
if ( dr.get() ) {
return new ArticleResourceReply( this, req, dr, contentType );
}
//dr.get() can be null. code continue to execute.
//can not match dictionary in the above code,means the url must be external links.
//if not external url,can be blocked from here. no need to continue execute the following code.
//such as bres://upload.wikimedia.... etc .
if ( !Utils::isExternalLink( url ) ) {
gdWarning( R"(Blocking element "%s" as built-in link )", req.url().toEncoded().data() );
return new BlockedNetworkReply( this );
}
// Check the Referer. If the user has opted-in to block elements from external
// pages, we block them.
if ( disallowContentFromOtherSites && req.hasRawHeader( "Referer" ) ) {
QByteArray referer = req.rawHeader( "Referer" );
QUrl refererUrl = QUrl::fromEncoded( referer );
if ( !url.host().endsWith( refererUrl.host() )
2022-10-15 05:58:24 +00:00
&& Utils::Url::getHostBaseFromUrl( url ) != Utils::Url::getHostBaseFromUrl( refererUrl )
&& !url.scheme().startsWith( "data" ) ) {
gdWarning( R"(Blocking element "%s" due to not same domain)", url.toEncoded().data() );
return new BlockedNetworkReply( this );
}
}
2012-12-13 20:21:33 +00:00
if ( req.url().scheme() == "file" ) {
// Check file presence and adjust path if necessary
QString fileName = req.url().toLocalFile();
if ( req.url().host().isEmpty() && ArticleMaker::adjustFilePath( fileName ) ) {
QUrl newUrl( req.url() );
QUrl localUrl = QUrl::fromLocalFile( fileName );
newUrl.setHost( localUrl.host() );
newUrl.setPath( Utils::Url::ensureLeadingSlash( localUrl.path() ) );
2012-12-13 20:21:33 +00:00
QNetworkRequest newReq( req );
newReq.setUrl( newUrl );
return QNetworkAccessManager::createRequest( op, newReq, nullptr );
2012-12-13 20:21:33 +00:00
}
}
// spoof User-Agent
QNetworkRequest newReq;
newReq.setUrl( url );
newReq.setAttribute( QNetworkRequest::RedirectPolicyAttribute, QNetworkRequest::NoLessSafeRedirectPolicy );
if ( hideGoldenDictHeader && url.scheme().startsWith( "http", Qt::CaseInsensitive ) ) {
newReq.setRawHeader( "User-Agent", req.rawHeader( "User-Agent" ).replace( qApp->applicationName().toUtf8(), "" ) );
}
QNetworkReply * reply = QNetworkAccessManager::createRequest( op, newReq, nullptr );
if ( url.scheme() == "https" ) {
#ifndef QT_NO_SSL
connect( reply, SIGNAL( sslErrors( QList< QSslError > ) ), reply, SLOT( ignoreSslErrors() ) );
#endif
}
return new AllowFrameReply( reply );
2021-08-05 06:57:22 +00:00
}
string ArticleNetworkAccessManager::getHtml( ResourceType resourceType )
{
switch ( resourceType ) {
case ResourceType::UNTITLE:
2023-10-10 13:27:48 +00:00
return articleMaker.makeUntitleHtml();
case ResourceType::WELCOME:
2023-10-10 13:27:48 +00:00
return articleMaker.makeWelcomeHtml();
case ResourceType::BLANK:
return articleMaker.makeBlankHtml();
default:
return {};
}
}
sptr< Dictionary::DataRequest > ArticleNetworkAccessManager::getResource( QUrl const & url, QString & contentType )
{
2022-10-22 05:42:17 +00:00
qDebug() << "getResource:" << url.toString();
qDebug() << "scheme:" << url.scheme();
qDebug() << "host:" << url.host();
if ( url.scheme() == "gdlookup" ) {
2014-05-23 17:43:44 +00:00
if ( !url.host().isEmpty() && url.host() != "localhost" ) {
// Strange request - ignore it
return std::make_shared< Dictionary::DataRequestInstant >( false );
2014-05-23 17:43:44 +00:00
}
contentType = "text/html";
if ( Utils::Url::queryItemValue( url, "blank" ) == "1" ) {
return articleMaker.makeEmptyPage();
}
QString word = Utils::Url::queryItemValue( url, "word" ).trimmed();
bool groupIsValid = false;
unsigned group = Utils::Url::queryItemValue( url, "group" ).toUInt( &groupIsValid );
QString dictIDs = Utils::Url::queryItemValue( url, "dictionaries" );
2014-04-16 16:18:28 +00:00
if ( !dictIDs.isEmpty() ) {
// Individual dictionaries set from full-text search
QStringList dictIDList = dictIDs.split( "," );
return articleMaker.makeDefinitionFor( word, group, QMap< QString, QString >(), QSet< QString >(), dictIDList );
2014-04-16 16:18:28 +00:00
}
// See if we have some dictionaries muted
QStringList mutedDictLists = Utils::Url::queryItemValue( url, "muted" ).split( ',' );
QSet< QString > mutedDicts( mutedDictLists.begin(), mutedDictLists.end() );
// Unpack contexts
QString const contextsEncoded = Utils::Url::queryItemValue( url, "contexts" );
QMap< QString, QString > const contexts = Utils::str2map( contextsEncoded );
// See for ignore diacritics
bool ignoreDiacritics = Utils::Url::queryItemValue( url, "ignore_diacritics" ) == "1";
if ( groupIsValid && !word.isEmpty() ) { // Require group and phrase to be passed
return articleMaker.makeDefinitionFor( word, group, contexts, mutedDicts, QStringList(), ignoreDiacritics );
}
}
2013-06-22 16:36:25 +00:00
if ( ( url.scheme() == "bres" || url.scheme() == "gdau" || url.scheme() == "gdvideo" || url.scheme() == "gico" )
&& url.path().size() ) {
QMimeType mineType = db.mimeTypeForUrl( url );
contentType = mineType.name();
string id = url.host().toStdString();
bool search = ( id == "search" );
if ( !search ) {
for ( const auto & dictionary : dictionaries ) {
if ( dictionary->getId() == id ) {
if ( url.scheme() == "gico" ) {
QByteArray bytes;
QBuffer buffer( &bytes );
buffer.open( QIODevice::WriteOnly );
dictionary->getIcon().pixmap( 64 ).save( &buffer, "PNG" );
buffer.close();
sptr< Dictionary::DataRequestInstant > ico = std::make_shared< Dictionary::DataRequestInstant >( true );
ico->getData().resize( bytes.size() );
memcpy( &( ico->getData().front() ), bytes.data(), bytes.size() );
return ico;
}
2013-09-19 19:43:16 +00:00
try {
return dictionary->getResource( Utils::Url::path( url ).mid( 1 ).toUtf8().data() );
2013-09-19 19:43:16 +00:00
}
catch ( std::exception & e ) {
gdWarning( "getResource request error (%s) in \"%s\"\n", e.what(), dictionary->getName().c_str() );
return {};
2013-09-19 19:43:16 +00:00
}
}
}
}
}
return {};
}
ArticleResourceReply::ArticleResourceReply( QObject * parent,
QNetworkRequest const & netReq,
sptr< Dictionary::DataRequest > const & req_,
QString const & contentType ):
QNetworkReply( parent ),
req( req_ ),
alreadyRead( 0 )
{
setRequest( netReq );
setOpenMode( ReadOnly );
2022-01-10 12:17:22 +00:00
setUrl( netReq.url() );
if ( contentType.size() ) {
setHeader( QNetworkRequest::ContentTypeHeader, contentType );
}
connect( req.get(), &Dictionary::Request::updated, this, &ArticleResourceReply::reqUpdated );
connect( req.get(), &Dictionary::Request::finished, this, &ArticleResourceReply::reqFinished );
if ( req->isFinished() || req->dataSize() > 0 ) {
connect( this,
2023-03-26 06:46:27 +00:00
&ArticleResourceReply::readyReadSignal,
this,
&ArticleResourceReply::readyReadSlot,
Qt::QueuedConnection );
connect( this,
2023-03-26 06:46:27 +00:00
&ArticleResourceReply::finishedSignal,
this,
&ArticleResourceReply::finishedSlot,
Qt::QueuedConnection );
emit readyReadSignal();
if ( req->isFinished() ) {
emit finishedSignal();
GD_DPRINTF( "In-place finish.\n" );
}
}
}
ArticleResourceReply::~ArticleResourceReply()
{
req->cancel();
}
void ArticleResourceReply::reqUpdated()
{
emit readyRead();
}
void ArticleResourceReply::reqFinished()
{
emit readyRead();
finishedSlot();
}
qint64 ArticleResourceReply::bytesAvailable() const
{
qint64 const avail = req->dataSize();
if ( avail < 0 ) {
return 0;
}
qint64 const availBytes = avail - alreadyRead + QNetworkReply::bytesAvailable();
if ( availBytes == 0 && !req->isFinished() ) {
return 10240;
}
return availBytes;
}
bool ArticleResourceReply::atEnd() const
{
return req->isFinished() && bytesAvailable() == 0;
}
qint64 ArticleResourceReply::readData( char * out, qint64 maxSize )
{
// From the doc: "This function might be called with a maxSize of 0,
// which can be used to perform post-reading operations".
if ( maxSize == 0 ) {
return 0;
}
bool const finished = req->isFinished();
qint64 const avail = req->dataSize();
if ( avail < 0 ) {
return finished ? -1 : 0;
}
qint64 const left = avail - alreadyRead;
qint64 const toRead = maxSize < left ? maxSize : left;
if ( !toRead && finished ) {
return -1;
}
2023-07-30 00:47:38 +00:00
GD_DPRINTF( "====reading %d of (%lld) bytes . Finished: %d", (int)toRead, avail, finished );
2013-09-19 19:43:16 +00:00
try {
req->getDataSlice( alreadyRead, toRead, out );
}
catch ( std::exception & e ) {
qWarning( "getDataSlice error: %s", e.what() );
2013-09-19 19:43:16 +00:00
}
alreadyRead += toRead;
if ( !toRead && finished ) {
return -1;
}
else {
return toRead;
}
}
void ArticleResourceReply::readyReadSlot()
{
emit readyRead();
}
void ArticleResourceReply::finishedSlot()
{
2022-01-08 04:22:41 +00:00
if ( req->dataSize() < 0 ) {
emit errorOccurred( ContentNotFoundError );
2022-01-08 04:22:41 +00:00
setError( ContentNotFoundError, "content not found" );
}
//prevent sent multi times.
if ( !finishSignalSent.loadAcquire() ) {
finishSignalSent.ref();
2023-03-26 06:46:27 +00:00
setFinished( true );
emit finished();
}
}
BlockedNetworkReply::BlockedNetworkReply( QObject * parent ):
QNetworkReply( parent )
{
setError( QNetworkReply::ContentOperationNotPermittedError, "Content Blocked" );
connect( this, &BlockedNetworkReply::finishedSignal, this, &BlockedNetworkReply::finishedSlot, Qt::QueuedConnection );
emit finishedSignal(); // This way we call readyRead()/finished() sometime later
}
void BlockedNetworkReply::finishedSlot()
{
emit readyRead();
2023-03-26 06:46:27 +00:00
setFinished( true );
emit finished();
}
2021-08-05 06:57:22 +00:00
LocalSchemeHandler::LocalSchemeHandler( ArticleNetworkAccessManager & articleNetMgr, QObject * parent ):
QWebEngineUrlSchemeHandler( parent ),
mManager( articleNetMgr )
{
2021-08-14 07:25:10 +00:00
}
2021-10-03 11:28:26 +00:00
2021-10-02 12:48:49 +00:00
void LocalSchemeHandler::requestStarted( QWebEngineUrlRequestJob * requestJob )
2021-08-14 07:25:10 +00:00
{
QUrl const url = requestJob->requestUrl();
QNetworkRequest request;
request.setUrl( url );
2021-09-24 12:29:13 +00:00
//all the url reached here must be either gdlookup or bword scheme.
auto [ schemeValid, word ] = Utils::Url::getQueryWord( url );
// or the condition can be (!queryWord.first || word.isEmpty())
// ( queryWord.first && word.isEmpty() ) is only part of the above condition.
if ( schemeValid && word.isEmpty() ) {
// invalid gdlookup url.
return;
}
QNetworkReply * reply = this->mManager.getArticleReply( request );
2023-03-26 06:46:27 +00:00
requestJob->reply( "text/html", reply );
connect( requestJob, &QObject::destroyed, reply, &QObject::deleteLater );
2021-08-14 07:25:10 +00:00
}