goldendict-ng/zipsounds.cc
shenleban tongying 540dda26ed cleanup: Replace all usages of qrcx://localhost with qrc://
The replacement command:
git grep -l 'qrcx://localhost' | xargs sed -i 's/qrcx:\/\/localhost/qrc:\/\//g'

The qrcx:// URL scheme was introduced in 2009 or earlier - it is present
in the first commit in GoldenDict's git history. Back then GoldenDict
supported Qt versions earlier than 4.6, in which
QWebSecurityOrigin::addLocalScheme() was introduced. Adding the qrc URL
scheme as local obsoletes the qrcx URL scheme. GoldenDict does not
compile against Qt versions earlier than 4.6, so there is no reason to
use this custom URL scheme anymore.

Co-authored-by:  Igor Kushnir <igorkuo@gmail.com>
2023-03-05 15:20:05 -05:00

524 lines
15 KiB
C++

/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "zipsounds.hh"
#include "file.hh"
#include "folding.hh"
#include "utf8.hh"
#include "btreeidx.hh"
#include "fsencoding.hh"
#include "audiolink.hh"
#include "indexedzip.hh"
#include "filetype.hh"
#include "gddebug.hh"
#include "chunkedstorage.hh"
#include "htmlescape.hh"
#include <set>
#include <string>
#include <QFile>
#include <QDir>
#include <QDebug>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include "utils.hh"
namespace ZipSounds {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
namespace {
DEF_EX( exInvalidData, "Invalid data encountered", Dictionary::Ex )
enum
{
Signature = 0x5350495a, // ZIPS on little-endian, SPIZ on big-endian
CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion
};
struct IdxHeader
{
uint32_t signature; // First comes the signature, ZIPS
uint32_t formatVersion; // File format version, currently 1.
uint32_t soundsCount; // Total number of sounds, for informative purposes only
uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
uint32_t indexRootOffset;
uint32_t chunksOffset; // The offset to chunks' storage
}
#ifndef _MSC_VER
__attribute__((packed))
#endif
;
bool indexIsOldOrBad( string const & indexFile )
{
File::Class idx( indexFile, "rb" );
IdxHeader header;
return idx.readRecords( &header, sizeof( header ), 1 ) != 1 ||
header.signature != Signature ||
header.formatVersion != CurrentFormatVersion;
}
wstring stripExtension( string const & str )
{
wstring name;
try
{
name = Utf8::decode( str );
}
catch( Utf8::exCantDecode & )
{
return name;
}
if( Filetype::isNameOfSound( str ) )
{
wstring::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos )
name.erase( pos );
// Strip spaces at the end of name
string::size_type n = name.length();
while( n && name.at( n - 1 ) == L' ' )
n--;
if( n != name.length() )
name.erase( n );
}
return name;
}
class ZipSoundsDictionary: public BtreeIndexing::BtreeDictionary
{
Mutex idxMutex;
File::Class idx;
IdxHeader idxHeader;
sptr< ChunkedStorage::Reader > chunks;
IndexedZip zipsFile;
public:
ZipSoundsDictionary( string const & id, string const & indexFile,
vector< string > const & dictionaryFiles );
string getName() noexcept override;
map< Dictionary::Property, string > getProperties() noexcept override
{ return map< Dictionary::Property, string >(); }
unsigned long getArticleCount() noexcept override
{ return idxHeader.soundsCount; }
unsigned long getWordCount() noexcept override
{ return getArticleCount(); }
sptr< Dictionary::DataRequest > getArticle( wstring const &,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics ) override
;
sptr< Dictionary::DataRequest > getResource( string const & name ) override
;
protected:
void loadIcon() noexcept override;
};
ZipSoundsDictionary::ZipSoundsDictionary( string const & id,
string const & indexFile,
vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idxHeader( idx.read< IdxHeader >() )
{
chunks = std::shared_ptr<ChunkedStorage::Reader>(new ChunkedStorage::Reader( idx, idxHeader.chunksOffset ));
// Initialize the index
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
idxHeader.indexRootOffset ),
idx, idxMutex );
QString zipName = QDir::fromNativeSeparators(
FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) );
zipsFile.openZipFile( zipName );
zipsFile.openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
idxHeader.indexRootOffset ),
idx, idxMutex );
}
string ZipSoundsDictionary::getName() noexcept
{
string result = FsEncoding::basename( getDictionaryFilenames()[ 0 ] );
// Strip the extension
result.erase( result.rfind( '.' ) );
return result;
}
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics )
{
vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics );
for( unsigned x = 0; x < alts.size(); ++x )
{
/// Make an additional query for each alt
vector< WordArticleLink > altChain = findArticles( alts[ x ], ignoreDiacritics );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, uint32_t > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
if( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
for( unsigned x = 0; x < chain.size(); ++x )
{
if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
continue; // We already have this article in the body.
// Ok. Now, does it go to main articles, or to alternate ones? We list
// main ones first, and alternates after.
// We do the case-folded comparison here.
wstring headwordStripped =
Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].word ) );
if( ignoreDiacritics )
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
multimap< wstring, uint32_t > & mapToUse =
( wordCaseFolded == headwordStripped ) ?
mainArticles : alternateArticles;
mapToUse.insert( std::pair< wstring, uint32_t >(
Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].word ) ), chain[ x ].articleOffset ) );
articlesIncluded.insert( chain[ x ].articleOffset );
}
if ( mainArticles.empty() && alternateArticles.empty() )
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such word
string result;
multimap< wstring, uint32_t >::const_iterator i;
result += "<table class=\"lsa_play\">";
vector< char > chunk;
char * nameBlock;
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
{
try
{
Mutex::Lock _( idxMutex );
nameBlock = chunks->getBlock( i->second, chunk );
if ( nameBlock >= &chunk.front() + chunk.size() )
{
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
}
catch( ChunkedStorage::exAddressOutOfRange & )
{
// Bad address
continue;
}
uint16_t sz;
memcpy( &sz, nameBlock, sizeof( uint16_t ) );
nameBlock += sizeof( uint16_t );
string name( nameBlock, sz );
nameBlock += sz;
string displayedName = mainArticles.size() + alternateArticles.size() > 1 ?
name : Utf8::encode( stripExtension( name ) );
result += "<tr>";
QUrl url;
url.setScheme( "gdau" );
url.setHost( QString::fromUtf8( getId().c_str() ) );
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromUtf8( name.c_str() ) ) );
string ref = string( "\"" ) + url.toEncoded().data() + "\"";
result += addAudioLink( ref, getId() );
result += "<td><a href=" + ref + R"(><img src="qrc:///icons/playsound.png" border="0" alt="Play"/></a></td>)";
result += "<td><a href=" + ref + ">" + Html::escape( displayedName ) + "</a></td>";
result += "</tr>";
}
for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i )
{
try
{
Mutex::Lock _( idxMutex );
nameBlock = chunks->getBlock( i->second, chunk );
if ( nameBlock >= &chunk.front() + chunk.size() )
{
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
}
catch( ChunkedStorage::exAddressOutOfRange & )
{
// Bad address
continue;
}
uint16_t sz;
memcpy( &sz, nameBlock, sizeof( uint16_t ) );
nameBlock += sizeof( uint16_t );
string name( nameBlock, sz );
nameBlock += sz;
string displayedName = mainArticles.size() + alternateArticles.size() > 1 ?
name : Utf8::encode( stripExtension( name ) );
result += "<tr>";
QUrl url;
url.setScheme( "gdau" );
url.setHost( QString::fromUtf8( getId().c_str() ) );
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromUtf8( name.c_str() ) ) );
string ref = string( "\"" ) + url.toEncoded().data() + "\"";
result += addAudioLink( ref, getId() );
result += "<td><a href=" + ref + R"(><img src="qrc:///icons/playsound.png" border="0" alt="Play"/></a></td>)";
result += "<td><a href=" + ref + ">" + Html::escape( displayedName ) + "</a></td>";
result += "</tr>";
}
result += "</table>";
Dictionary::DataRequestInstant * ret =
new Dictionary::DataRequestInstant( true );
ret->getData().resize( result.size() );
memcpy( &(ret->getData().front()), result.data(), result.size() );
return std::shared_ptr<Dictionary::DataRequestInstant>(ret);
}
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getResource( string const & name )
{
// Remove extension for sound files (like in sound dirs)
wstring strippedName = stripExtension( name );
vector< WordArticleLink > chain = findArticles( strippedName );
if ( chain.empty() )
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such resource
// Find sound
uint32_t dataOffset = 0;
for( int x = chain.size() - 1; x >= 0 ; x-- )
{
vector< char > chunk;
char * nameBlock = chunks->getBlock( chain[ x ].articleOffset, chunk );
uint16_t sz;
memcpy( &sz, nameBlock, sizeof( uint16_t ) );
nameBlock += sizeof( uint16_t );
string fileName( nameBlock, sz );
nameBlock += sz;
memcpy( &dataOffset, nameBlock, sizeof( uint32_t ) );
if( name.compare( fileName ) == 0 )
break;
}
sptr< Dictionary::DataRequestInstant > dr =
std::make_shared<Dictionary::DataRequestInstant>( true );
if ( zipsFile.loadFile( dataOffset, dr->getData() ) )
return dr;
return std::make_shared<Dictionary::DataRequestInstant>( false );
}
void ZipSoundsDictionary::loadIcon() noexcept
{
if ( dictionaryIconLoaded )
return;
QString fileName =
QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) );
// Remove the extension
fileName.chop( 4 );
if( !loadIconFromFile( fileName ) )
{
// Load failed -- use default icons
dictionaryNativeIcon = dictionaryIcon = QIcon(":/icons/playsound_full.png");
}
dictionaryIconLoaded = true;
}
}
vector< sptr< Dictionary::Class > > makeDictionaries(
vector< string > const & fileNames,
string const & indicesDir,
Dictionary::Initializing & initializing )
{
(void) initializing;
vector< sptr< Dictionary::Class > > dictionaries;
for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end();
++i )
{
/// Only allow .zips extension
if ( i->size() < 5 || strcasecmp( i->c_str() + ( i->size() - 5 ), ".zips" ) != 0 )
continue;
try
{
vector< string > dictFiles( 1, *i );
string dictId = Dictionary::makeDictionaryId( dictFiles );
string indexFile = indicesDir + dictId;
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
indexIsOldOrBad( indexFile ) )
{
gdDebug( "Zips: Building the index for dictionary: %s\n", i->c_str() );
File::Class idx( indexFile, "wb" );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
// We write a dummy header first. At the end of the process the header
// will be rewritten with the right values.
idx.write( idxHeader );
IndexedWords names, zipFileNames;
ChunkedStorage::Writer chunks( idx );
quint32 namesCount;
IndexedZip zipFile;
if( zipFile.openZipFile( QDir::fromNativeSeparators(
FsEncoding::decode( i->c_str() ) ) ) )
zipFile.indexFile( zipFileNames, &namesCount );
if( !zipFileNames.empty() )
{
for( IndexedWords::iterator i = zipFileNames.begin(); i != zipFileNames.end(); ++i )
{
vector< WordArticleLink > links = i->second;
for( unsigned x = 0; x < links.size(); x++ )
{
// Save original name
uint32_t offset = chunks.startNewBlock();
uint16_t sz = links[ x ].word.size();
chunks.addToBlock( &sz, sizeof(uint16_t) );
chunks.addToBlock( links[ x ].word.c_str(), sz );
chunks.addToBlock( &links[ x ].articleOffset, sizeof( uint32_t ) );
// Remove extension for sound files (like in sound dirs)
wstring word = stripExtension( links[ x ].word );
if( !word.empty() )
names.addWord( word, offset );
}
}
// Finish with the chunks
idxHeader.chunksOffset = chunks.finish();
// Build the resulting zip file index
IndexInfo idxInfo = BtreeIndexing::buildIndex( names, idx );
// That concludes it. Update the header.
idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
idxHeader.indexRootOffset = idxInfo.rootOffset;
idxHeader.signature = Signature;
idxHeader.formatVersion = CurrentFormatVersion;
idxHeader.soundsCount = namesCount;
idx.rewind();
idx.write( &idxHeader, sizeof( idxHeader ) );
}
else
{
idx.close();
QFile::remove( QDir::fromNativeSeparators(
FsEncoding::decode( indexFile.c_str() ) ) );
throw exInvalidData();
}
}
dictionaries.push_back( std::make_shared<ZipSoundsDictionary>( dictId,
indexFile,
dictFiles ) );
}
catch( std::exception & e )
{
gdWarning( "Zipped sounds pack reading failed: %s, error: %s\n",
i->c_str(), e.what() );
}
}
return dictionaries;
}
}