goldendict-ng/sounddir.cc
shenleban tongying 540dda26ed cleanup: Replace all usages of qrcx://localhost with qrc://
The replacement command:
git grep -l 'qrcx://localhost' | xargs sed -i 's/qrcx:\/\/localhost/qrc:\/\//g'

The qrcx:// URL scheme was introduced in 2009 or earlier - it is present
in the first commit in GoldenDict's git history. Back then GoldenDict
supported Qt versions earlier than 4.6, in which
QWebSecurityOrigin::addLocalScheme() was introduced. Adding the qrc URL
scheme as local obsoletes the qrcx URL scheme. GoldenDict does not
compile against Qt versions earlier than 4.6, so there is no reason to
use this custom URL scheme anymore.

Co-authored-by:  Igor Kushnir <igorkuo@gmail.com>
2023-03-05 15:20:05 -05:00

496 lines
14 KiB
C++

/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "sounddir.hh"
#include "file.hh"
#include "folding.hh"
#include "utf8.hh"
#include "btreeidx.hh"
#include "chunkedstorage.hh"
#include "filetype.hh"
#include "htmlescape.hh"
#include "audiolink.hh"
#include "wstring_qt.hh"
#include "fsencoding.hh"
#include "utils.hh"
#include <set>
#include <QDir>
#include <QFileInfo>
#include <QDebug>
namespace SoundDir {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
namespace {
enum
{
Signature = 0x58524453, // SDRX on little-endian, XRDS on big-endian
CurrentFormatVersion = 1+ BtreeIndexing::FormatVersion + Folding::Version
};
struct IdxHeader
{
uint32_t signature; // First comes the signature, SDRX
uint32_t formatVersion; // File format version, is to be CurrentFormatVersion
uint32_t soundsCount; // Total number of sounds, for informative purposes only
uint32_t chunksOffset; // The offset to chunks' storage
uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
uint32_t indexRootOffset;
}
#ifndef _MSC_VER
__attribute__((packed))
#endif
;
bool indexIsOldOrBad( string const & indexFile )
{
File::Class idx( indexFile, "rb" );
IdxHeader header;
return idx.readRecords( &header, sizeof( header ), 1 ) != 1 ||
header.signature != Signature ||
header.formatVersion != CurrentFormatVersion;
}
class SoundDirDictionary: public BtreeIndexing::BtreeDictionary
{
string name;
Mutex idxMutex;
File::Class idx;
IdxHeader idxHeader;
ChunkedStorage::Reader chunks;
QString iconFilename;
public:
SoundDirDictionary( string const & id, string const & name,
string const & indexFile,
vector< string > const & dictionaryFiles,
QString const & iconFilename_ );
string getName() noexcept override
{ return name; }
map< Dictionary::Property, string > getProperties() noexcept override
{ return map< Dictionary::Property, string >(); }
unsigned long getArticleCount() noexcept override
{ return idxHeader.soundsCount; }
unsigned long getWordCount() noexcept override
{ return getArticleCount(); }
sptr< Dictionary::DataRequest > getArticle( wstring const &,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics ) override
;
sptr< Dictionary::DataRequest > getResource( string const & name ) override
;
protected:
void loadIcon() noexcept override;
};
SoundDirDictionary::SoundDirDictionary( string const & id,
string const & name_,
string const & indexFile,
vector< string > const & dictionaryFiles,
QString const & iconFilename_ ):
BtreeDictionary( id, dictionaryFiles ),
name( name_ ),
idx( indexFile, "rb" ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset ),
iconFilename( iconFilename_ )
{
// Initialize the index
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
idxHeader.indexRootOffset ),
idx, idxMutex );
}
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics )
{
vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics );
for( unsigned x = 0; x < alts.size(); ++x )
{
/// Make an additional query for each alt
vector< WordArticleLink > altChain = findArticles( alts[ x ], ignoreDiacritics );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
// maps to the chain number
multimap< wstring, unsigned > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
if( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
for( unsigned x = 0; x < chain.size(); ++x )
{
if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
continue; // We already have this article in the body.
// Ok. Now, does it go to main articles, or to alternate ones? We list
// main ones first, and alternates after.
// We do the case-folded comparison here.
wstring headwordStripped =
Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].word ) );
if( ignoreDiacritics )
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
multimap< wstring, unsigned > & mapToUse =
( wordCaseFolded == headwordStripped ) ?
mainArticles : alternateArticles;
mapToUse.insert( std::pair< wstring, uint32_t >(
Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].word ) ), x ) );
articlesIncluded.insert( chain[ x ].articleOffset );
}
if ( mainArticles.empty() && alternateArticles.empty() )
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such word
string result;
multimap< wstring, uint32_t >::const_iterator i;
string displayedName;
vector< char > chunk;
char * nameBlock;
result += "<table class=\"lsa_play\">";
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
{
if( mainArticles.size() + alternateArticles.size() <= 1 )
displayedName = chain[ i->second ].word;
else
{
try
{
Mutex::Lock _( idxMutex );
nameBlock = chunks.getBlock( chain[ i->second ].articleOffset, chunk );
if ( nameBlock >= &chunk.front() + chunk.size() )
{
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
displayedName = string( nameBlock );
}
catch( ChunkedStorage::exAddressOutOfRange & )
{
// Bad address
continue;
}
}
result += "<tr>";
QUrl url;
url.setScheme( "gdau" );
url.setHost( QString::fromUtf8( getId().c_str() ) );
url.setPath( Utils::Url::ensureLeadingSlash( QString::number( chain[ i->second ].articleOffset ) ) );
string ref = string( "\"" ) + url.toEncoded().data() + "\"";
result += addAudioLink( ref, getId() );
result += "<td><a href=" + ref + R"(><img src="qrc:///icons/playsound.png" border="0" alt="Play"/></a></td>)";
result += "<td><a href=" + ref + ">" + Html::escape( displayedName ) + "</a></td>";
result += "</tr>";
}
for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i )
{
if( mainArticles.size() + alternateArticles.size() <= 1 )
displayedName = chain[ i->second ].word;
else
{
try
{
Mutex::Lock _( idxMutex );
nameBlock = chunks.getBlock( chain[ i->second ].articleOffset, chunk );
if ( nameBlock >= &chunk.front() + chunk.size() )
{
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
displayedName = string( nameBlock );
}
catch( ChunkedStorage::exAddressOutOfRange & )
{
// Bad address
continue;
}
}
result += "<tr>";
QUrl url;
url.setScheme( "gdau" );
url.setHost( QString::fromUtf8( getId().c_str() ) );
url.setPath( Utils::Url::ensureLeadingSlash( QString::number( chain[ i->second ].articleOffset ) ) );
string ref = string( "\"" ) + url.toEncoded().data() + "\"";
result += addAudioLink( ref, getId() );
result += "<td><a href=" + ref + R"(><img src="qrc:///icons/playsound.png" border="0" alt="Play"/></a></td>)";
result += "<td><a href=" + ref + ">" + Html::escape( displayedName ) + "</a></td>";
result += "</tr>";
}
result += "</table>";
Dictionary::DataRequestInstant * ret =
new Dictionary::DataRequestInstant( true );
ret->getData().resize( result.size() );
memcpy( &(ret->getData().front()), result.data(), result.size() );
return std::shared_ptr<Dictionary::DataRequestInstant>(ret);
}
void SoundDirDictionary::loadIcon() noexcept
{
if ( dictionaryIconLoaded )
return;
if( !iconFilename.isEmpty() )
{
QFileInfo fInfo( QDir( Config::getConfigDir() ), iconFilename );
if( fInfo.isFile() )
loadIconFromFile( fInfo.absoluteFilePath(), true );
}
if( dictionaryIcon.isNull() )
dictionaryIcon = dictionaryNativeIcon = QIcon(":/icons/playsound_full.png");
dictionaryIconLoaded = true;
}
sptr< Dictionary::DataRequest > SoundDirDictionary::getResource( string const & name )
{
bool isNumber = false;
uint32_t articleOffset = QString::fromUtf8( name.c_str() ).toULong( &isNumber );
if ( !isNumber )
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such resource
vector< char > chunk;
char * articleData;
try
{
Mutex::Lock _( idxMutex );
articleData = chunks.getBlock( articleOffset, chunk );
if ( articleData >= &chunk.front() + chunk.size() )
{
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
}
catch( ChunkedStorage::exAddressOutOfRange & )
{
// Bad address
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such resource
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
QDir dir( QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) ) );
QString fileName = QDir::toNativeSeparators( dir.filePath( QString::fromUtf8( articleData ) ) );
// Now try loading that file
try
{
File::Class f( FsEncoding::encode( fileName ), "rb" );
sptr< Dictionary::DataRequestInstant > dr =
std::make_shared<Dictionary::DataRequestInstant>( true );
vector< char > & data = dr->getData();
f.seekEnd();
data.resize( f.tell() );
f.rewind();
f.read( &data.front(), data.size() );
return dr;
}
catch( File::Ex & )
{
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such resource
}
}
void addDir( QDir const & baseDir, QDir const & dir, IndexedWords & indexedWords,
uint32_t & soundsCount, ChunkedStorage::Writer & chunks )
{
QFileInfoList entries = dir.entryInfoList( QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot );
for( QFileInfoList::const_iterator i = entries.constBegin();
i != entries.constEnd(); ++i )
{
if ( i->isDir() )
addDir( baseDir, QDir( i->absoluteFilePath() ), indexedWords, soundsCount, chunks );
else
if ( Filetype::isNameOfSound( i->fileName().toUtf8().data() ) )
{
// Add this sound to index
string fileName = baseDir.relativeFilePath( i->filePath() ).toUtf8().data();
uint32_t articleOffset = chunks.startNewBlock();
chunks.addToBlock( fileName.c_str(), fileName.size() + 1 );
wstring name = gd::toWString( i->fileName() );
wstring::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos )
name.erase( pos );
indexedWords.addWord( name, articleOffset );
++soundsCount;
}
}
}
}
vector< sptr< Dictionary::Class > > makeDictionaries( Config::SoundDirs const & soundDirs,
string const & indicesDir,
Dictionary::Initializing & initializing )
{
vector< sptr< Dictionary::Class > > dictionaries;
for( Config::SoundDirs::const_iterator i = soundDirs.begin(); i != soundDirs.end();
++i )
{
QDir dir( i->path );
if ( !dir.exists() )
continue; // No such dir, no dictionary then
vector< string > dictFiles( 1,
FsEncoding::encode( QDir::toNativeSeparators( dir.canonicalPath() ) ) );
dictFiles.push_back( "SoundDir" ); // A mixin
string dictId = Dictionary::makeDictionaryId( dictFiles );
dictFiles.pop_back(); // Remove mixin
string indexFile = indicesDir + dictId;
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) )
{
// Building the index
qDebug() << "Sounds: Building the index for directory: " << i->path;
initializing.indexingDictionary( i->name.toUtf8().data() );
File::Class idx( indexFile, "wb" );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
// We write a dummy header first. At the end of the process the header
// will be rewritten with the right values.
idx.write( idxHeader );
IndexedWords indexedWords;
ChunkedStorage::Writer chunks( idx );
uint32_t soundsCount = 0; // Header's one is packed, we can't ref it
addDir( dir, dir, indexedWords, soundsCount, chunks );
idxHeader.soundsCount = soundsCount;
// Finish with the chunks
idxHeader.chunksOffset = chunks.finish();
// Build the index
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
idxHeader.indexRootOffset = idxInfo.rootOffset;
// That concludes it. Update the header.
idxHeader.signature = Signature;
idxHeader.formatVersion = CurrentFormatVersion;
idx.rewind();
idx.write( &idxHeader, sizeof( idxHeader ) );
}
dictionaries.push_back( std::make_shared<SoundDirDictionary>( dictId,
i->name.toUtf8().data(),
indexFile,
dictFiles,
i->iconFilename ) );
}
return dictionaries;
}
}