" ); } void SdictDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration ) { if ( !( Dictionary::needToRebuildIndex( getDictionaryFilenames(), ftsIdxName ) || FtsHelpers::ftsIndexIsOldOrBad( this ) ) ) FTS_index_completed.ref(); if ( haveFTSIndex() ) return; if ( ensureInitDone().size() ) return; if ( firstIteration && getArticleCount() > FTS::MaxDictionarySizeForFastSearch ) return; gdDebug( "SDict: Building the full-text index for dictionary: %s\n", getName().c_str() ); try { FtsHelpers::makeFTSIndex( this, isCancelled ); FTS_index_completed.ref(); } catch ( std::exception & ex ) { gdWarning( "SDict: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() ); QFile::remove( ftsIdxName.c_str() ); } } void SdictDictionary::getArticleText( uint32_t articleAddress, QString & headword, QString & text ) { try { string articleStr; headword.clear(); text.clear(); loadArticle( articleAddress, articleStr ); try { text = Html::unescape( QString::fromStdString( articleStr ) ); } catch ( std::exception & ) { } } catch ( std::exception & ex ) { gdWarning( "SDict: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() ); } } sptr< Dictionary::DataRequest > SdictDictionary::getSearchResults( QString const & searchString, int searchMode, bool matchCase, bool ignoreDiacritics ) { return std::make_shared< FtsHelpers::FTSResultsRequest >( *this, searchString, searchMode, matchCase, ignoreDiacritics ); } /// SdictDictionary::getArticle() class SdictArticleRequest: public Dictionary::DataRequest { wstring word; vector< wstring > alts; SdictDictionary & dict; bool ignoreDiacritics; QAtomicInt isCancelled; QFuture< void > f; public: SdictArticleRequest( wstring const & word_, vector< wstring > const & alts_, SdictDictionary & dict_, bool ignoreDiacritics_ ): word( word_ ), alts( alts_ ), dict( dict_ ), ignoreDiacritics( ignoreDiacritics_ ) { f = QtConcurrent::run( [ this ]() { this->run(); } ); } void run(); void cancel() override { isCancelled.ref(); } ~SdictArticleRequest() { isCancelled.ref(); f.waitForFinished(); } }; void SdictArticleRequest::run() { if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) { finish(); return; } vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics ); for ( const auto & alt : alts ) { /// Make an additional query for each alt vector< WordArticleLink > altChain = dict.findArticles( alt, ignoreDiacritics ); chain.insert( chain.end(), altChain.begin(), altChain.end() ); } multimap< wstring, pair< string, string > > mainArticles, alternateArticles; set< uint32_t > articlesIncluded; // Some synonims make it that the articles // appear several times. We combat this // by only allowing them to appear once. wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); if ( ignoreDiacritics ) wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); for ( auto & x : chain ) { if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) { finish(); return; } if ( articlesIncluded.find( x.articleOffset ) != articlesIncluded.end() ) continue; // We already have this article in the body. // Now grab that article string headword, articleText; headword = x.word; try { dict.loadArticle( x.articleOffset, articleText ); // Ok. Now, does it go to main articles, or to alternate ones? We list // main ones first, and alternates after. // We do the case-folded comparison here. wstring headwordStripped = Folding::applySimpleCaseOnly( headword ); if ( ignoreDiacritics ) headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); multimap< wstring, pair< string, string > > & mapToUse = ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) ); articlesIncluded.insert( x.articleOffset ); } catch ( std::exception & ex ) { gdWarning( "SDict: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() ); } } if ( mainArticles.empty() && alternateArticles.empty() ) { // No such word finish(); return; } string result; multimap< wstring, pair< string, string > >::const_iterator i; for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { result += dict.isFromLanguageRTL() ? "

" : "

"; result += i->second.first; result += "

"; result += i->second.second; } for ( i = alternateArticles.begin(); i != alternateArticles.end(); ++i ) { result += dict.isFromLanguageRTL() ? "

" : "

"; result += i->second.first; result += "

"; if ( dict.isToLanguageRTL() ) result += ""; result += i->second.second; if ( dict.isToLanguageRTL() ) result += ""; } appendString( result ); hasAnyData = true; finish(); } sptr< Dictionary::DataRequest > SdictDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) { return std::make_shared< SdictArticleRequest >( word, alts, *this, ignoreDiacritics ); } QString const & SdictDictionary::getDescription() { if ( !dictionaryDescription.isEmpty() ) return dictionaryDescription; dictionaryDescription = QObject::tr( "Title: %1%2" ).arg( QString::fromUtf8( getName().c_str() ) ).arg( "\n\n" ); try { QMutexLocker _( &sdictMutex ); DCT_header dictHeader; df.seek( 0 ); if ( df.readRecords( &dictHeader, sizeof( dictHeader ), 1 ) != 1 ) throw exCantReadFile( getDictionaryFilenames()[ 0 ] ); int compression = dictHeader.compression & 0x0F; vector< char > data; uint32_t size; string str; df.seek( dictHeader.copyrightOffset ); df.read( &size, sizeof( size ) ); data.resize( size ); df.read( &data.front(), size ); if ( compression == 1 ) str = decompressZlib( data.data(), size ); else if ( compression == 2 ) str = decompressBzip2( data.data(), size ); else str = string( data.data(), size ); dictionaryDescription += QObject::tr( "Copyright: %1%2" ).arg( QString::fromUtf8( str.c_str(), str.size() ) ).arg( "\n\n" ); df.seek( dictHeader.versionOffset ); df.read( &size, sizeof( size ) ); data.resize( size ); df.read( &data.front(), size ); if ( compression == 1 ) str = decompressZlib( data.data(), size ); else if ( compression == 2 ) str = decompressBzip2( data.data(), size ); else str = string( data.data(), size ); dictionaryDescription += QObject::tr( "Version: %1%2" ).arg( QString::fromUtf8( str.c_str(), str.size() ) ).arg( "\n\n" ); } catch ( std::exception & ex ) { gdWarning( "SDict: Failed description reading for \"%s\", reason: %s\n", getName().c_str(), ex.what() ); } if ( dictionaryDescription.isEmpty() ) dictionaryDescription = "NONE"; return dictionaryDescription; } } // anonymous namespace vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames, string const & indicesDir, Dictionary::Initializing & initializing ) { vector< sptr< Dictionary::Class > > dictionaries; for ( const auto & fileName : fileNames ) { // Skip files with the extensions different to .dct to speed up the // scanning if ( !Utils::endsWithIgnoreCase( fileName, ".dct" ) ) continue; // Got the file -- check if we need to rebuid the index vector< string > dictFiles( 1, fileName ); string dictId = Dictionary::makeDictionaryId( dictFiles ); string indexFile = indicesDir + dictId; if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) { try { gdDebug( "SDict: Building the index for dictionary: %s\n", fileName.c_str() ); File::Class df( fileName, "rb" ); DCT_header dictHeader; df.read( &dictHeader, sizeof( dictHeader ) ); if ( strncmp( dictHeader.signature, "sdct", 4 ) ) { gdWarning( "File \"%s\" is not valid SDictionary file", fileName.c_str() ); continue; } int compression = dictHeader.compression & 0x0F; vector< char > data; uint32_t size; df.seek( dictHeader.titleOffset ); df.read( &size, sizeof( size ) ); data.resize( size ); df.read( &data.front(), size ); string dictName; if ( compression == 1 ) dictName = decompressZlib( data.data(), size ); else if ( compression == 2 ) dictName = decompressBzip2( data.data(), size ); else dictName = string( data.data(), size ); initializing.indexingDictionary( dictName ); File::Class idx( indexFile, "wb" ); IdxHeader idxHeader; memset( &idxHeader, 0, sizeof( idxHeader ) ); // We write a dummy header first. At the end of the process the header // will be rewritten with the right values. idx.write( idxHeader ); idx.write( (uint32_t)dictName.size() ); idx.write( dictName.data(), dictName.size() ); IndexedWords indexedWords; ChunkedStorage::Writer chunks( idx ); uint32_t wordCount = 0; set< uint32_t > articleOffsets; uint32_t pos = dictHeader.fullIndexOffset; for ( uint32_t j = 0; j < dictHeader.wordCount; j++ ) { IndexElement el; df.seek( pos ); df.read( &el, sizeof( el ) ); uint32_t articleOffset = dictHeader.articlesOffset + el.articleOffset; size = el.nextWord - sizeof( el ); if ( el.nextWord < sizeof( el ) ) break; wordCount++; data.resize( size ); df.read( &data.front(), size ); if ( articleOffsets.find( articleOffset ) == articleOffsets.end() ) articleOffsets.insert( articleOffset ); // Insert new entry indexedWords.addWord( Utf8::decode( string( data.data(), size ) ), articleOffset ); pos += el.nextWord; } // Finish with the chunks idxHeader.chunksOffset = chunks.finish(); // Build index IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx ); idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements; idxHeader.indexRootOffset = idxInfo.rootOffset; indexedWords.clear(); // Release memory -- no need for this data // That concludes it. Update the header. idxHeader.signature = Signature; idxHeader.formatVersion = CurrentFormatVersion; idxHeader.articleCount = articleOffsets.size(); idxHeader.wordCount = wordCount; idxHeader.langFrom = LangCoder::code2toInt( dictHeader.inputLang ); idxHeader.langTo = LangCoder::code2toInt( dictHeader.outputLang ); idxHeader.compressionType = compression; idx.rewind(); idx.write( &idxHeader, sizeof( idxHeader ) ); } catch ( std::exception & e ) { gdWarning( "Sdictionary dictionary indexing failed: %s, error: %s\n", fileName.c_str(), e.what() ); continue; } catch ( ... ) { qWarning( "Sdictionary dictionary indexing failed\n" ); continue; } } // if need to rebuild try { dictionaries.push_back( std::make_shared< SdictDictionary >( dictId, indexFile, dictFiles ) ); } catch ( std::exception & e ) { gdWarning( "Sdictionary dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() ); } } return dictionaries; } } // namespace Sdict