/* This file is (c) 2008-2012 Konstantin Isakov * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ #include "article_maker.hh" #include "config.hh" #include "htmlescape.hh" #include "utf8.hh" #include "wstring_qt.hh" #include #include #include #include #include "folding.hh" #include "langcoder.hh" #include "gddebug.hh" #include "utils.hh" #include "globalbroadcaster.h" #include "base/globalregex.hh" using std::vector; using std::string; using gd::wstring; using std::set; using std::list; inline bool ankiConnectEnabled() { return GlobalBroadcaster::instance()->getPreference()->ankiConnectServer.enabled; } ArticleMaker::ArticleMaker( vector< sptr< Dictionary::Class > > const & dictionaries_, vector< Instances::Group > const & groups_, const Config::Preferences & cfg_ ): dictionaries( dictionaries_ ), groups( groups_ ), cfg( cfg_ ) { } std::string ArticleMaker::makeHtmlHeader( QString const & word, QString const & icon, bool expandOptionalParts ) const { string result = "" "" ""; // add jquery { result += ""; result += ""; //custom javascript result += R"()"; //iframe resizer javascript result += R"()"; } // add qwebchannel { result += R"()"; } // document ready ,init webchannel { result += R"( )"; } // Add a css stylesheet { result += R"()"; if ( cfg.displayStyle.size() ) { // Load an additional stylesheet QString displayStyleCssFile = QString("qrc:///article-style-st-%1.css").arg(cfg.displayStyle); result += ")"; } result += readCssFile(Config::getUserCssFileName() ,"all"); if( !cfg.addonStyle.isEmpty() ) { QString name = Config::getStylesDir() + cfg.addonStyle + QDir::separator() + "article-style.css"; result += readCssFile(name ,"all"); } // Turn on/off expanding of article optional parts if( expandOptionalParts ) { result += R"( )"; } } // Add print-only css { result += R"()"; result += readCssFile(Config::getUserCssPrintFileName() ,"print"); if( !cfg.addonStyle.isEmpty() ) { QString name = Config::getStylesDir() + cfg.addonStyle + QDir::separator() + "article-style-print.css"; result += readCssFile(name ,"print"); } } result += "" + Html::escape( word.toStdString()) + ""; // This doesn't seem to be much of influence right now, but we'll keep // it anyway. if ( icon.size() ) result += R"(\n"; result += QString::fromUtf8( R"( )" ).arg( tr( "Expand article" ), tr( "Collapse article" ) ) .toStdString(); result+= R"()"; if( GlobalBroadcaster::instance()->getPreference()->darkReaderMode ) { // #242525 because Darkreader will invert pure white to this value result += R"( )"; } result += ""; return result; } std::string ArticleMaker::readCssFile(QString const & fileName, std::string media) const{ QFile addonCss(fileName); std::string result; if (addonCss.open(QFile::ReadOnly)) { QByteArray css = addonCss.readAll(); if (!css.isEmpty()) { result += "\n"; result += R"(\n"; } } return result; } std::string ArticleMaker::makeNotFoundBody( QString const & word, QString const & group ) { string result( "

" ); QString str( word ); if( str.isRightToLeft() ) { str.insert( 0, (ushort)0x202E ); // RLE, Right-to-Left Embedding str.append( (ushort)0x202C ); // PDF, POP DIRECTIONAL FORMATTING } if ( word.size() ) result += tr( "No translation for %1 was found in group %2." ). arg( QString::fromUtf8( Html::escape( str.toUtf8().data() ).c_str() ), QString::fromUtf8( Html::escape( group.toUtf8().data() ).c_str() ) ). toUtf8().data(); else result += tr( "No translation was found in group %1." ). arg( QString::fromUtf8( Html::escape( group.toUtf8().data() ).c_str() ) ). toUtf8().data(); result += "

"; return result; } sptr< Dictionary::DataRequest > ArticleMaker::makeDefinitionFor( Config::InputPhrase const & phrase, unsigned groupId, QMap< QString, QString > const & contexts, QSet< QString > const & mutedDicts, QStringList const & dictIDs , bool ignoreDiacritics ) const { if( !dictIDs.isEmpty() ) { QStringList ids = dictIDs; std::vector< sptr< Dictionary::Class > > ftsDicts; // Find dictionaries by ID's for( unsigned x = 0; x < dictionaries.size(); x++ ) { for( QStringList::Iterator it = ids.begin(); it != ids.end(); ++it ) { if( *it == QString::fromStdString( dictionaries[ x ]->getId() ) ) { ftsDicts.push_back( dictionaries[ x ] ); ids.erase( it ); break; } } if( ids.isEmpty() ) break; } string header = makeHtmlHeader( phrase.phrase, QString(), true ); return std::make_shared( phrase, "", contexts, ftsDicts, header, -1, true ); } if ( groupId == Instances::Group::HelpGroupId ) { // This is a special group containing internal welcome/help pages string result = makeHtmlHeader( phrase.phrase, QString(), cfg.alwaysExpandOptionalParts); if ( phrase.phrase == tr( "Welcome!" ) ) { result += tr( "

Welcome to GoldenDict!

" "

To start working with the program, first visit Edit|Dictionaries to add some directory paths where to search " "for the dictionary files, set up various Wikipedia sites or other sources, adjust dictionary order or create dictionary groups." "

And then you're ready to look up your words! You can do that in this window " "by using a pane to the left, or you can look up words from other active applications. " "

To customize program, check out the available preferences at Edit|Preferences. " "All settings there have tooltips, be sure to read them if you are in doubt about anything." "

Should you need further help, have any questions, " "suggestions or just wonder what the others think, you are welcome at the program's forum." "

Check program's website for the updates. " "

(c) 2008-2013 Konstantin Isakov. Licensed under GPLv3 or later." ).toUtf8().data(); } else if ( phrase.phrase == tr( "Working with popup" ) ) { result += ( tr( "

Working with the popup

" "To look up words from other active applications, you would need to first activate the \"Scan popup functionality\" in Preferences, " "and then enable it at any time either by triggering the 'Popup' icon above, or " "by clicking the tray icon down below with your right mouse button and choosing so in the menu you've popped. " ) + #ifdef Q_OS_WIN32 tr( "Then just stop the cursor over the word you want to look up in another application, " "and a window would pop up which would describe it to you." ) #else tr( "Then just select any word you want to look up in another application by your mouse " "(double-click it or swipe it with mouse with the button pressed), " "and a window would pop up which would describe the word to you." ) #endif ).toUtf8().data(); } else { // Not found return makeNotFoundTextFor( phrase.phrase, "help" ); } result += ""; sptr< Dictionary::DataRequestInstant > r = std::make_shared( true ); r->appendDataSlice( result.data(), result.size() ); return r; } // Find the given group Instances::Group const * activeGroup = 0; for( unsigned x = 0; x < groups.size(); ++x ) if ( groups[ x ].id == groupId ) { activeGroup = &groups[ x ]; break; } // If we've found a group, use its dictionaries; otherwise, use the global // heap. std::vector< sptr< Dictionary::Class > > const & activeDicts = activeGroup ? activeGroup->dictionaries : dictionaries; string header = makeHtmlHeader( phrase.phrase, activeGroup && activeGroup->icon.size() ? activeGroup->icon : QString(), cfg.alwaysExpandOptionalParts ); if ( mutedDicts.size() ) { std::vector< sptr< Dictionary::Class > > unmutedDicts; unmutedDicts.reserve( activeDicts.size() ); for( unsigned x = 0; x < activeDicts.size(); ++x ) if ( !mutedDicts.contains( QString::fromStdString( activeDicts[ x ]->getId() ) ) ) unmutedDicts.push_back( activeDicts[ x ] ); return std::make_shared( phrase, activeGroup ? activeGroup->name : "", contexts, unmutedDicts, header, cfg.collapseBigArticles ? cfg.articleSizeLimit : -1, cfg.alwaysExpandOptionalParts, ignoreDiacritics ); } else return std::make_shared( phrase, activeGroup ? activeGroup->name : "", contexts, activeDicts, header, cfg.collapseBigArticles ? cfg.articleSizeLimit : -1, cfg.alwaysExpandOptionalParts, ignoreDiacritics ); } sptr< Dictionary::DataRequest > ArticleMaker::makeNotFoundTextFor( QString const & word, QString const & group ) const { string result = makeHtmlHeader( word, QString(), true ) + makeNotFoundBody( word, group ) + ""; sptr< Dictionary::DataRequestInstant > r = std::make_shared( true ); r->appendDataSlice( result.data(), result.size() ); return r; } sptr< Dictionary::DataRequest > ArticleMaker::makeEmptyPage() const { string result = makeHtmlHeader( tr( "(untitled)" ), QString(), true ) + ""; sptr< Dictionary::DataRequestInstant > r = std::make_shared( true ); r->appendDataSlice( result.data(), result.size() ); return r; } sptr< Dictionary::DataRequest > ArticleMaker::makePicturePage( string const & url ) const { string result = makeHtmlHeader( tr( "(picture)" ), QString(), true ) + "2) history.go(-1)\">" + "" + ""; sptr< Dictionary::DataRequestInstant > r = std::make_shared( true ); r->appendDataSlice( result.data(), result.size() ); return r; } bool ArticleMaker::adjustFilePath( QString & fileName ) { QFileInfo info( fileName ); if( !info.isFile() ) { QString dir = Config::getConfigDir(); dir.chop( 1 ); info.setFile( dir + fileName); if( info.isFile() ) { fileName = info.canonicalFilePath(); return true; } } return false; } //////// ArticleRequest ArticleRequest::ArticleRequest( Config::InputPhrase const & phrase, QString const & group_, QMap< QString, QString > const & contexts_, vector< sptr< Dictionary::Class > > const & activeDicts_, string const & header, int sizeLimit, bool needExpandOptionalParts_, bool ignoreDiacritics_ ): word( phrase.phrase ), group( group_ ), contexts( contexts_ ), activeDicts( activeDicts_ ), articleSizeLimit( sizeLimit ), needExpandOptionalParts( needExpandOptionalParts_ ), ignoreDiacritics( ignoreDiacritics_ ) { if ( !phrase.punctuationSuffix.isEmpty() ) alts.insert( gd::toWString( phrase.phraseWithSuffix() ) ); // No need to lock dataMutex on construction hasAnyData = true; appendDataSlice( (void *) header.data(), header.size() ); //clear founded dicts. emit GlobalBroadcaster::instance()->dictionaryClear( ActiveDictIds{word} ); // Accumulate main forms for( unsigned x = 0; x < activeDicts.size(); ++x ) { sptr< Dictionary::WordSearchRequest > s = activeDicts[ x ]->findHeadwordsForSynonym( gd::toWString( word ) ); connect( s.get(), &Dictionary::Request::finished, this, &ArticleRequest::altSearchFinished, Qt::QueuedConnection ); altSearches.push_back( s ); } altSearchFinished(); // Handle any ones which have already finished } void ArticleRequest::altSearchFinished() { if ( altsDone ) return; // Check every request for finishing for( list< sptr< Dictionary::WordSearchRequest > >::iterator i = altSearches.begin(); i != altSearches.end(); ) { if ( (*i)->isFinished() ) { // This one's finished for( size_t count = (*i)->matchesCount(), x = 0; x < count; ++x ) alts.insert( (**i)[ x ].word ); altSearches.erase( i++ ); } else ++i; } if ( altSearches.empty() ) { #ifdef QT_DEBUG qDebug( "alts finished" ); #endif // They all've finished! Now we can look up bodies altsDone = true; // So any pending signals in queued mode won't mess us up vector< wstring > altsVector( alts.begin(), alts.end() ); #ifdef QT_DEBUG for( unsigned x = 0; x < altsVector.size(); ++x ) { qDebug() << "Alt:" << gd::toQString( altsVector[ x ] ); } #endif wstring wordStd = gd::toWString( word ); if( activeDicts.size() <= 1 ) articleSizeLimit = -1; // Don't collapse article if only one dictionary presented for( unsigned x = 0; x < activeDicts.size(); ++x ) { try { sptr< Dictionary::DataRequest > r = activeDicts[ x ]->getArticle( wordStd, altsVector, gd::toWString( contexts.value( QString::fromStdString( activeDicts[ x ]->getId() ) ) ), ignoreDiacritics ); connect( r.get(), &Dictionary::Request::finished, this, &ArticleRequest::bodyFinished, Qt::QueuedConnection ); bodyRequests.push_back( r ); } catch( std::exception & e ) { gdWarning( "getArticle request error (%s) in \"%s\"\n", e.what(), activeDicts[ x ]->getName().c_str() ); } } bodyFinished(); // Handle any ones which have already finished } } int ArticleRequest::findEndOfCloseDiv( const QString &str, int pos ) { for( ; ; ) { const int n1 = str.indexOf( "", pos ); if( n1 <= 0 ) return n1; // will there be some custom tags starts with
,such as const int n2 = str.indexOf( RX::Html::startDivTag, pos ); if( n2 <= 0 || n2 > n1 ) return n1 + 6; pos = findEndOfCloseDiv( str, n2 + 1 ); if( pos <= 0 ) return pos; } } bool ArticleRequest::isCollapsable( Dictionary::DataRequest & req ,QString const & dictId) { if ( GlobalBroadcaster::instance()->collapsedDicts.contains( dictId ) ) return true; bool collapse = false; if( articleSizeLimit >= 0 ) { try { Mutex::Lock _( dataMutex ); QString text = QString::fromUtf8( req.getFullData().data(), req.getFullData().size() ); if( !needExpandOptionalParts ) { // Strip DSL optional parts for( ; ; ) { const int pos = text.indexOf( "
0 ) { const int endPos = findEndOfCloseDiv( text, pos + 1 ); if( endPos > pos) text.remove( pos, endPos - pos ); else break; } else break; } } int size = htmlTextSize( text ); if( size > articleSizeLimit ) collapse = true; } catch(...) { } } return collapse; } void ArticleRequest::bodyFinished() { if ( bodyDone ) return; GD_DPRINTF( "some body finished" ); bool wasUpdated = false; QStringList dictIds; while ( bodyRequests.size() ) { // Since requests should go in order, check the first one first if ( bodyRequests.front()->isFinished() ) { // Good GD_DPRINTF( "one finished." ); Dictionary::DataRequest & req = *bodyRequests.front(); QString errorString = req.getErrorString(); if ( req.dataSize() >= 0 || errorString.size() ) { sptr< Dictionary::Class > const & activeDict = activeDicts[ activeDicts.size() - bodyRequests.size() ]; string dictId = activeDict->getId(); dictIds << QString::fromStdString(dictId); string head; string gdFrom = "gdfrom-" + Html::escape( dictId ); if ( closePrevSpan ) { head += R"(
)"; } bool collapse = isCollapsable( req, QString::fromStdString( dictId ) ); string jsVal = Html::escapeForJavaScript( dictId ); head += QString::fromUtf8( R"(
)" ) .arg( closePrevSpan ? "" : " gdactivearticle" , collapse ? " gdcollapsedarticle" : "" , gdFrom.c_str() , jsVal.c_str() ) .toStdString(); closePrevSpan = true; head += QString::fromUtf8( R"(
%4 %5
)" ) .arg( dictId.c_str(), collapse ? R"(style="cursor:pointer;")" : "", collapse ? tr( "Expand article" ) : QString(), Html::escape( tr( "From " ).toStdString() ).c_str(), Html::escape( activeDict->getName() ).c_str(), collapse ? "gdexpandicon" : "gdcollapseicon", collapse ? "" : tr( "Collapse article" ) ) .toStdString(); head += R"(
)"; // If the user has enabled Anki integration in settings, // Show a (+) button that lets the user add a new Anki card. if ( ankiConnectEnabled() ) { QString link{ R"EOF( )EOF" }; head += link.arg( Html::escape( dictId ).c_str(), tr( "Make a new Anki note" ) ).toStdString(); } head += QString::fromUtf8( R"(
)" ) .arg( LangCoder::intToCode2( activeDict->getLangFrom() ), LangCoder::intToCode2( activeDict->getLangTo() ), collapse ? "none" : "inline", dictId.c_str() ) .toStdString(); if( errorString.size() ) { head += "
" + Html::escape( tr( "Query error: %1" ).arg( errorString ).toUtf8().data() ) + "
"; } appendDataSlice( head.data(), head.size() ); try { if( req.dataSize() > 0 ) { auto d = bodyRequests.front()->getFullData(); appendDataSlice( &d.front(), d.size() ); } } catch( std::exception & e ) { gdWarning( "getDataSlice error: %s\n", e.what() ); } wasUpdated = true; foundAnyDefinitions = true; } GD_DPRINTF( "erasing.." ); bodyRequests.pop_front(); GD_DPRINTF( "erase done.." ); } else { GD_DPRINTF( "one not finished." ); break; } } if ( bodyRequests.empty() ) { // No requests left, end the article bodyDone = true; { string footer; if ( closePrevSpan ) { footer += "
"; closePrevSpan = false; } if ( !foundAnyDefinitions ) { // No definitions were ever found, say so to the user. // Larger words are usually whole sentences - don't clutter the output // with their full bodies. footer += ArticleMaker::makeNotFoundBody( word.size() < 40 ? word : "", group ); // When there were no definitions, we run stemmed search. stemmedWordFinder = std::make_shared< WordFinder >( this ); connect( stemmedWordFinder.get(), &WordFinder::finished, this, &ArticleRequest::stemmedSearchFinished, Qt::QueuedConnection ); stemmedWordFinder->stemmedMatch( word, activeDicts ); } else { footer += ""; } appendDataSlice( footer.data(), footer.size() ); } if( stemmedWordFinder.get() ) { update(); qDebug() << "send dicts(stemmed):" << word << ":" << dictIds; emit GlobalBroadcaster::instance()->dictionaryChanges( ActiveDictIds{ word, dictIds } ); dictIds.clear(); } else { finish(); qDebug() << "send dicts(finished):" << word << ":" << dictIds; emit GlobalBroadcaster::instance()->dictionaryChanges( ActiveDictIds{ word, dictIds } ); dictIds.clear(); } } else if( wasUpdated ) { update(); qDebug() << "send dicts(updated):" << word << ":" << dictIds; emit GlobalBroadcaster::instance()->dictionaryChanges( ActiveDictIds{ word, dictIds } ); dictIds.clear(); } } int ArticleRequest::htmlTextSize( QString html ) { // website dictionary. if( html.contains( QRegularExpression( "]*>", QRegularExpression::CaseInsensitiveOption ) ) ) { //arbitary number; return 1000; } //https://bugreports.qt.io/browse/QTBUG-102757 QString stripStyleSheet = html.remove( QRegularExpression( "]*>", QRegularExpression::CaseInsensitiveOption ) ) .remove( QRegularExpression( R"([\s\S]*?<\/script>)", QRegularExpression::CaseInsensitiveOption|QRegularExpression::MultilineOption ) ); int size = QTextDocumentFragment::fromHtml( stripStyleSheet ).toPlainText().length(); return size; } void ArticleRequest::stemmedSearchFinished() { // Got stemmed matching results WordFinder::SearchResults sr = stemmedWordFinder->getResults(); string footer; bool continueMatching = false; if ( sr.size() ) { footer += R"(
)" + Html::escape( tr( "Close words: " ).toUtf8().data() ) + ""; for( unsigned x = 0; x < sr.size(); ++x ) { footer += linkWord( sr[ x ].first ); if ( x != sr.size() - 1 ) { footer += ", "; } } footer += "
"; } splittedWords = splitIntoWords( word ); if ( splittedWords.first.size() > 1 ) // Contains more than one word { disconnect( stemmedWordFinder.get(), &WordFinder::finished, this, &ArticleRequest::stemmedSearchFinished ); connect( stemmedWordFinder.get(), &WordFinder::finished, this, &ArticleRequest::individualWordFinished, Qt::QueuedConnection ); currentSplittedWordStart = -1; currentSplittedWordEnd = currentSplittedWordStart; firstCompoundWasFound = false; compoundSearchNextStep( false ); continueMatching = true; } if ( !continueMatching ) footer += ""; { appendDataSlice( footer.data(), footer.size() ); } if( continueMatching ) update(); else finish(); } void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded ) { if ( !lastSearchSucceeded ) { // Last search was unsuccessful. First, emit what we had. string footer; if ( lastGoodCompoundResult.size() ) // We have something to append { // GD_DPRINTF( "Appending\n" ); if ( !firstCompoundWasFound ) { // Append the beginning footer += R"(
)" + Html::escape( tr( "Compound expressions: " ).toUtf8().data() ) + ""; firstCompoundWasFound = true; } else { // Append the separator footer += " / "; } footer += linkWord( lastGoodCompoundResult ); lastGoodCompoundResult.clear(); } // Then, start a new search for the next word, if possible if ( currentSplittedWordStart >= splittedWords.first.size() - 2 ) { // The last word was the last possible to start from if ( firstCompoundWasFound ) footer += ""; // Now add links to all the individual words. They conclude the result. footer += R"(
)" + Html::escape( tr( "Individual words: " ).toUtf8().data() ) + "expressionMatch( currentSplittedWordCompound, activeDicts, 40, // Would one be enough? Leave 40 to be safe. Dictionary::SuitableForCompoundSearching ); } QString ArticleRequest::makeSplittedWordCompound() { QString result; result.clear(); for( int x = currentSplittedWordStart; x <= currentSplittedWordEnd; ++x ) { result.append( splittedWords.first[ x ] ); if ( x < currentSplittedWordEnd ) { wstring ws( gd::toWString( splittedWords.second[ x + 1 ] ) ); Folding::normalizeWhitespace( ws ); result.append( gd::toQString( ws ) ); } } return result; } void ArticleRequest::individualWordFinished() { WordFinder::SearchResults const & results = stemmedWordFinder->getResults(); if ( results.size() ) { wstring source = Folding::applySimpleCaseOnly( gd::toWString( currentSplittedWordCompound ) ); bool hadSomething = false; for( unsigned x = 0; x < results.size(); ++x ) { if ( results[ x ].second ) { // Spelling suggestion match found. No need to continue. hadSomething = true; lastGoodCompoundResult = currentSplittedWordCompound; break; } // Prefix match found. Check if the aliases are acceptable. wstring result( Folding::applySimpleCaseOnly( gd::toWString( results[ x ].first ) ) ); if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 ) { // The resulting string begins with the source one hadSomething = true; if ( source.size() == result.size() ) { // Got the match. No need to continue. lastGoodCompoundResult = currentSplittedWordCompound; break; } } } if ( hadSomething ) { compoundSearchNextStep( true ); return; } } compoundSearchNextStep( false ); } void ArticleRequest::appendToData( std::string const & str ) { appendDataSlice( str.data(), str.size() ); } QPair< ArticleRequest::Words, ArticleRequest::Spacings > ArticleRequest::splitIntoWords( QString const & input ) { QPair< Words, Spacings > result; QChar const * ptr = input.data(); for( ; ; ) { QString spacing; for( ; ptr->unicode() && ( Folding::isPunct( ptr->unicode() ) || Folding::isWhitespace( ptr->unicode() ) ); ++ptr ) spacing.append( *ptr ); result.second.append( spacing ); QString word; for( ; ptr->unicode() && !( Folding::isPunct( ptr->unicode() ) || Folding::isWhitespace( ptr->unicode() ) ); ++ptr ) word.append( *ptr ); if ( word.isEmpty() ) break; result.first.append( word ); } return result; } string ArticleRequest::linkWord( QString const & str ) { QUrl url; url.setScheme( "gdlookup" ); url.setHost( "localhost" ); url.setPath( Utils::Url::ensureLeadingSlash( str ) ); string escapedResult = Html::escape( str.toUtf8().data() ); return string( "" + escapedResult +""; } std::string ArticleRequest::escapeSpacing( QString const & str ) { QByteArray spacing = Html::escape( str.toUtf8().data() ).c_str(); spacing.replace( "\n", "
" ); return spacing.data(); } void ArticleRequest::cancel() { if( isFinished() ) return; if( !altSearches.empty() ) { for( list< sptr< Dictionary::WordSearchRequest > >::iterator i = altSearches.begin(); i != altSearches.end(); ++i ) { (*i)->cancel(); } } if( !bodyRequests.empty() ) { for( list< sptr< Dictionary::DataRequest > >::iterator i = bodyRequests.begin(); i != bodyRequests.end(); ++i ) { (*i)->cancel(); } } if( stemmedWordFinder.get() ) stemmedWordFinder->cancel(); finish(); }