"; articleText += "

"; if( articleText.compare( articleText.size() - 4, 4, "

" ) == 0 ) articleText.insert( articleText.size() - 4, " " + button ); else articleText += button; } articleText += articleAfter; } catch( std::exception &ex ) { gdWarning( "DSL: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() ); articleText = string( "" ) + QObject::tr( "Article loading error" ).toStdString() + ""; } Mutex::Lock _( dataMutex ); data.resize( data.size() + articleText.size() ); memcpy( &data.front() + data.size() - articleText.size(), articleText.data(), articleText.size() ); hasAnyData = true; } finish(); } sptr< Dictionary::DataRequest > DslDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) { return new DslArticleRequest( word, alts, *this, ignoreDiacritics ); } //// DslDictionary::getResource() class DslResourceRequest: public Dictionary::DataRequest { DslDictionary & dict; string resourceName; QAtomicInt isCancelled; QSemaphore hasExited; public: DslResourceRequest( DslDictionary & dict_, string const & resourceName_ ): dict( dict_ ), resourceName( resourceName_ ) { QThreadPool::globalInstance()->start( [ this ]() { this->run(); } ); } void run(); virtual void cancel() { isCancelled.ref(); } ~DslResourceRequest() { isCancelled.ref(); //hasExited.acquire(); } }; void DslResourceRequest::run() { // Some runnables linger enough that they are cancelled before they start if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) { finish(); return; } if ( dict.ensureInitDone().size() ) { setErrorString( QString::fromUtf8( dict.ensureInitDone().c_str() ) ); finish(); return; } string n = FsEncoding::dirname( dict.getDictionaryFilenames()[ 0 ] ) + FsEncoding::separator() + FsEncoding::encode( resourceName ); GD_DPRINTF( "n is %s\n", n.c_str() ); try { try { Mutex::Lock _( dataMutex ); File::loadFromFile( n, data ); } catch( File::exCantOpen & ) { n = dict.getResourceDir1() + FsEncoding::encode( resourceName ); try { Mutex::Lock _( dataMutex ); File::loadFromFile( n, data ); } catch( File::exCantOpen & ) { n = dict.getResourceDir2() + FsEncoding::encode( resourceName ); try { Mutex::Lock _( dataMutex ); File::loadFromFile( n, data ); } catch( File::exCantOpen & ) { // Try reading from zip file if ( dict.resourceZip.isOpen() ) { Mutex::Lock _( dict.resourceZipMutex ); Mutex::Lock __( dataMutex ); if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) throw; // Make it fail since we couldn't read the archive } else throw; } } } if ( Filetype::isNameOfTiff( resourceName ) ) { // Convert it Mutex::Lock _( dataMutex ); GdTiff::tiff2img( data ); } Mutex::Lock _( dataMutex ); hasAnyData = true; } catch( std::exception &ex ) { gdWarning( "DSL: Failed loading resource \"%s\" for \"%s\", reason: %s\n", resourceName.c_str(), dict.getName().c_str(), ex.what() ); // Resource not loaded -- we don't set the hasAnyData flag then } finish(); } sptr< Dictionary::DataRequest > DslDictionary::getResource( string const & name ) { return new DslResourceRequest( *this, name ); } sptr< Dictionary::DataRequest > DslDictionary::getSearchResults( QString const & searchString, int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, bool ignoreWordsOrder, bool ignoreDiacritics ) { return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } } // anonymous namespace /// makeDictionaries vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames, string const & indicesDir, Dictionary::Initializing & initializing, int maxPictureWidth, unsigned int maxHeadwordSize ) { vector< sptr< Dictionary::Class > > dictionaries; for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end(); ++i ) { // Try .dsl and .dsl.dz suffixes bool uncompressedDsl = ( i->size() >= 4 && strcasecmp( i->c_str() + ( i->size() - 4 ), ".dsl" ) == 0 ); if ( !uncompressedDsl && ( i->size() < 7 || strcasecmp( i->c_str() + ( i->size() - 7 ), ".dsl.dz" ) != 0 ) ) continue; // Make sure it's not an abbreviation file int extSize = ( uncompressedDsl ? 4 : 7 ); if ( i->size() - extSize >= 5 && strncasecmp( i->c_str() + i->size() - extSize - 5, "_abrv", 5 ) == 0 ) { // It is, skip it continue; } unsigned atLine = 0; // Indicates current line in .dsl, for debug purposes try { vector< string > dictFiles( 1, *i ); // Check if there is an 'abrv' file present string baseName = ( (*i)[ i->size() - 4 ] == '.' ) ? string( *i, 0, i->size() - 4 ) : string( *i, 0, i->size() - 7 ); string abrvFileName; if ( File::tryPossibleName( baseName + "_abrv.dsl", abrvFileName ) || File::tryPossibleName( baseName + "_abrv.dsl.dz", abrvFileName ) || File::tryPossibleName( baseName + "_ABRV.DSL", abrvFileName ) || File::tryPossibleName( baseName + "_ABRV.DSL.DZ", abrvFileName ) || File::tryPossibleName( baseName + "_ABRV.DSL.dz", abrvFileName ) ) dictFiles.push_back( abrvFileName ); string dictId = Dictionary::makeDictionaryId( dictFiles ); // See if there's a zip file with resources present. If so, include it. string zipFileName; if ( File::tryPossibleZipName( baseName + ".dsl.files.zip", zipFileName ) || File::tryPossibleZipName( baseName + ".dsl.dz.files.zip", zipFileName ) || File::tryPossibleZipName( baseName + ".DSL.FILES.ZIP", zipFileName ) || File::tryPossibleZipName( baseName + ".DSL.DZ.FILES.ZIP", zipFileName ) ) dictFiles.push_back( zipFileName ); string indexFile = indicesDir + dictId; if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile, zipFileName.size() ) ) { DslScanner scanner( *i ); try { // Here we intercept any errors during the read to save line at // which the incident happened. We need alive scanner for that. if( scanner.getDictionaryName() == U"Abbrev" ) continue; // For now just skip abbreviations // Building the index initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) ); gdDebug( "Dsl: Building the index for dictionary: %s\n", gd::toQString( scanner.getDictionaryName() ).toUtf8().data() ); File::Class idx( indexFile, "wb" ); IdxHeader idxHeader; memset( &idxHeader, 0, sizeof( idxHeader ) ); // We write a dummy header first. At the end of the process the header // will be rewritten with the right values. idx.write( idxHeader ); string dictionaryName = Utf8::encode( scanner.getDictionaryName() ); idx.write( (uint32_t)dictionaryName.size() ); idx.write( dictionaryName.data(), dictionaryName.size() ); string soundDictName = Utf8::encode( scanner.getSoundDictionaryName() ); if( !soundDictName.empty() ) { idxHeader.hasSoundDictionaryName = 1; idx.write( (uint32_t)soundDictName.size() ); idx.write( soundDictName.data(), soundDictName.size() ); } idxHeader.dslEncoding = scanner.getEncoding(); IndexedWords indexedWords; ChunkedStorage::Writer chunks( idx ); // Read the abbreviations if ( abrvFileName.size() ) { try { DslScanner abrvScanner( abrvFileName ); map< string, string > abrv; wstring curString; size_t curOffset; for( ; ; ) { // Skip any whitespace if ( !abrvScanner.readNextLineWithoutComments( curString, curOffset, true ) ) break; if ( curString.empty() || isDslWs( curString[ 0 ] ) ) continue; list< wstring > keys; bool eof = false; // Insert the key and read more, or get to the definition for( ; ; ) { processUnsortedParts( curString, true ); if ( keys.size() ) expandTildes( curString, keys.front() ); expandOptionalParts( curString, &keys ); if ( !abrvScanner.readNextLineWithoutComments( curString, curOffset ) || curString.empty() ) { gdWarning( "Premature end of file %s\n", abrvFileName.c_str() ); eof = true; break; } if ( isDslWs( curString[ 0 ] ) ) break; } if ( eof ) break; curString.erase( 0, curString.find_first_not_of( U" \t" ) ); if ( keys.size() ) expandTildes( curString, keys.front() ); // If the string has any dsl markup, we strip it string value = Utf8::encode( ArticleDom( curString ).root.renderAsText() ); for( list< wstring >::iterator i = keys.begin(); i != keys.end(); ++i ) { unescapeDsl( *i ); normalizeHeadword( *i ); abrv[ Utf8::encode( Folding::trimWhitespace( *i ) ) ] = value; } } idxHeader.hasAbrv = 1; idxHeader.abrvAddress = chunks.startNewBlock(); uint32_t sz = abrv.size(); chunks.addToBlock( &sz, sizeof( uint32_t ) ); for( map< string, string >::const_iterator i = abrv.begin(); i != abrv.end(); ++i ) { // GD_DPRINTF( "%s:%s\n", i->first.c_str(), i->second.c_str() ); sz = i->first.size(); chunks.addToBlock( &sz, sizeof( uint32_t ) ); chunks.addToBlock( i->first.data(), sz ); sz = i->second.size(); chunks.addToBlock( &sz, sizeof( uint32_t ) ); chunks.addToBlock( i->second.data(), sz ); } } catch( std::exception & e ) { gdWarning( "Error reading abrv file \"%s\", error: %s. Skipping it.\n", abrvFileName.c_str(), e.what() ); } } bool hasString = false; wstring curString; size_t curOffset; uint32_t articleCount = 0, wordCount = 0; for( ; ; ) { // Find the main headword if ( !hasString && !scanner.readNextLineWithoutComments( curString, curOffset, true) ) break; // Clean end of file hasString = false; // The line read should either consist of pure whitespace, or be a headword // skip too long headword,it can never be headword. if( curString.empty() || curString.size() > 100 ) continue; if ( isDslWs( curString[ 0 ] ) ) { // The first character is blank. Let's make sure that all other // characters are blank, too. for( size_t x = 1; x < curString.size(); ++x ) { if ( !isDslWs( curString[ x ] ) ) { gdWarning( "Garbage string in %s at offset 0x%lX\n", i->c_str(), (unsigned long) curOffset ); break; } } continue; } // Ok, got the headword list< wstring > allEntryWords; processUnsortedParts( curString, true ); expandOptionalParts( curString, &allEntryWords ); uint32_t articleOffset = curOffset; //GD_DPRINTF( "Headword: %ls\n", curString.c_str() ); // More headwords may follow for( ; ; ) { if ( ! ( hasString = scanner.readNextLineWithoutComments( curString, curOffset ) ) ) { gdWarning( "Premature end of file %s\n", i->c_str() ); break; } // Lingvo skips empty strings between the headwords if ( curString.empty() ) continue; if ( isDslWs( curString[ 0 ] ) ) break; // No more headwords #ifdef QT_DEBUG qDebug() << "Alt headword" << gd::toQString( curString ); #endif processUnsortedParts( curString, true ); expandTildes( curString, allEntryWords.front() ); expandOptionalParts( curString, &allEntryWords ); } if ( !hasString ) break; // Insert new entry uint32_t descOffset = chunks.startNewBlock(); chunks.addToBlock( &articleOffset, sizeof( articleOffset ) ); for( list< wstring >::iterator j = allEntryWords.begin(); j != allEntryWords.end(); ++j ) { unescapeDsl( *j ); normalizeHeadword( *j ); indexedWords.addWord( *j, descOffset, maxHeadwordSize ); } ++articleCount; wordCount += allEntryWords.size(); int insideInsided = 0; wstring headword; QVector< InsidedCard > insidedCards; uint32_t offset = curOffset; QVector< wstring > insidedHeadwords; unsigned linesInsideCard = 0; int dogLine = 0; bool wasEmptyLine = false; int headwordLine = scanner.getLinesRead() - 2; bool noSignificantLines = Folding::applyWhitespaceOnly( curString ).empty(); bool haveLine = !noSignificantLines; // Skip the article's body for( ; ; ) { hasString = haveLine ? true : scanner.readNextLineWithoutComments( curString, curOffset); haveLine = false; if ( !hasString || ( curString.size() && !isDslWs( curString[ 0 ] ) ) ) { if( insideInsided ) { gdWarning( "Unclosed tag '@' at line %i", dogLine ); insidedCards.append( InsidedCard( offset, curOffset - offset, insidedHeadwords ) ); } if( noSignificantLines ) gdWarning( "Orphan headword at line %i", headwordLine ); break; } // Check for orphan strings if( curString.empty() ) { wasEmptyLine = true; continue; } else { if( wasEmptyLine && !Folding::applyWhitespaceOnly( curString ).empty() ) gdWarning( "Orphan string at line %i", scanner.getLinesRead() - 1 ); } if( noSignificantLines ) noSignificantLines = Folding::applyWhitespaceOnly( curString ).empty(); // Find embedded cards wstring::size_type n = curString.find( L'@' ); if( n == wstring::npos || curString[ n - 1 ] == L'\\' ) { if( insideInsided ) linesInsideCard++; continue; } else { // Embedded card tag must be placed at first position in line after spaces if( !isAtSignFirst( curString ) ) { gdWarning( "Unescaped '@' symbol at line %i", scanner.getLinesRead() - 1 ); if( insideInsided ) linesInsideCard++; continue; } } dogLine = scanner.getLinesRead() - 1; // Handle embedded card if( insideInsided ) { if( linesInsideCard ) { insidedCards.append( InsidedCard( offset, curOffset - offset, insidedHeadwords ) ); insidedHeadwords.clear(); linesInsideCard = 0; offset = curOffset; } } else { offset = curOffset; linesInsideCard = 0; } headword = Folding::trimWhitespace( curString.substr( n + 1 ) ); if( !headword.empty() ) { processUnsortedParts( headword, true ); expandTildes( headword, allEntryWords.front() ); insidedHeadwords.append( headword ); insideInsided = true; } else insideInsided = false; } // Now that we're having read the first string after the article // itself, we can use its offset to calculate the article's size. // An end of file works here, too. uint32_t articleSize = ( curOffset - articleOffset ); chunks.addToBlock( &articleSize, sizeof( articleSize ) ); for( QVector< InsidedCard >::iterator i = insidedCards.begin(); i != insidedCards.end(); ++i ) { uint32_t descOffset = chunks.startNewBlock(); chunks.addToBlock( &(*i).offset, sizeof( (*i).offset ) ); chunks.addToBlock( &(*i).size, sizeof( (*i).size ) ); for( int x = 0; x < (*i).headwords.size(); x++ ) { allEntryWords.clear(); expandOptionalParts( (*i).headwords[ x ], &allEntryWords ); for( list< wstring >::iterator j = allEntryWords.begin(); j != allEntryWords.end(); ++j ) { unescapeDsl( *j ); normalizeHeadword( *j ); indexedWords.addWord( *j, descOffset, maxHeadwordSize ); } wordCount += allEntryWords.size(); } ++articleCount; } if ( !hasString ) break; } // Finish with the chunks idxHeader.chunksOffset = chunks.finish(); // Build index IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx ); idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements; idxHeader.indexRootOffset = idxInfo.rootOffset; indexedWords.clear(); // Release memory -- no need for this data // If there was a zip file, index it too if ( zipFileName.size() ) { GD_DPRINTF( "Indexing zip file\n" ); idxHeader.hasZipFile = 1; IndexedWords zipFileNames; IndexedZip zipFile; if( zipFile.openZipFile( QDir::fromNativeSeparators( FsEncoding::decode( zipFileName.c_str() ) ) ) ) zipFile.indexFile( zipFileNames ); if( !zipFileNames.empty() ) { // Build the resulting zip file index IndexInfo idxInfo = BtreeIndexing::buildIndex( zipFileNames, idx ); idxHeader.zipIndexBtreeMaxElements = idxInfo.btreeMaxElements; idxHeader.zipIndexRootOffset = idxInfo.rootOffset; } else { // Bad zip file -- no index (though the mark that we have one // remains) idxHeader.zipIndexBtreeMaxElements = 0; idxHeader.zipIndexRootOffset = 0; } } else idxHeader.hasZipFile = 0; // That concludes it. Update the header. idxHeader.signature = Signature; idxHeader.formatVersion = CurrentFormatVersion; idxHeader.zipSupportVersion = CurrentZipSupportVersion; idxHeader.articleCount = articleCount; idxHeader.wordCount = wordCount; idxHeader.langFrom = dslLanguageToId( scanner.getLangFrom() ); idxHeader.langTo = dslLanguageToId( scanner.getLangTo() ); idx.rewind(); idx.write( &idxHeader, sizeof( idxHeader ) ); } // In-place try for saving line count catch( ... ) { atLine = scanner.getLinesRead(); throw; } } // if need to rebuild dictionaries.push_back( new DslDictionary( dictId, indexFile, dictFiles, maxPictureWidth ) ); } catch( std::exception & e ) { gdWarning( "DSL dictionary reading failed: %s:%u, error: %s\n", i->c_str(), atLine, e.what() ); } } return dictionaries; } }